{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "from collections import deque\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.optimizers import Adam"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 594,
   "metadata": {},
   "outputs": [],
   "source": [
    "import game\n",
    "import agent\n",
    "import MCTS\n",
    "import utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 605,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<module 'utils' from 'utils.pyc'>"
      ]
     },
     "execution_count": 605,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reload(game)\n",
    "reload(agent)\n",
    "reload(MCTS)\n",
    "reload (utils)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 596,
   "metadata": {},
   "outputs": [],
   "source": [
    "EPISODES = 1\n",
    "env = game.make('XO')\n",
    "\n",
    "state_size = len(env.gameState.binary())\n",
    "action_size = len(env.actionSpace)\n",
    "\n",
    "player = agent.Agent(state_size, action_size)\n",
    "\n",
    "\n",
    "batch_size = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 597,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "state = env.reset()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 598,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 598,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tmp = []\n",
    "len(tmp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 599,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.0022397127)\n",
      "('PREDICTED PROBS: ', array([ 0.11277399,  0.11184875,  0.11176506,  0.11151404,  0.11135171,\n",
      "        0.11070792,  0.11025374,  0.10930624,  0.11047853], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 1, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 2, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 3, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([1, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.096750289)\n",
      "('PREDICTED PROBS: ', array([ 0.13639121,  0.12623754,  0.13234331,  0.11674145,  0.12030878,\n",
      "        0.11843675,  0.11815698,  0.1313839 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '100000000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 1, 'P', 0.11, 'W', 0.1, 'Q', 0.1, 'U', 0.08, 'Q+U', 0.18)\n",
      "('action', 1, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 2, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 3, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([1, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 2, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 4, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 5, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 8, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 1, -1,  0,  0,  0,  0,  0,  0,  0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.16966671)\n",
      "('PREDICTED PROBS: ', array([ 0.13934198,  0.15490389,  0.1292391 ,  0.1383692 ,  0.14654291,\n",
      "        0.14310853,  0.14849442], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '100000000000000000', '100000000010000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.13, 'U', 0.07, 'Q+U', 0.2)\n",
      "('action', 1, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 2, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 3, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([1, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 1, 'N', 1, 'P', 0.14, 'W', 0.17, 'Q', 0.17, 'U', 0.1, 'Q+U', 0.27)\n",
      "('action', 2, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 4, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 5, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 8, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 1, -1,  0,  0,  0,  0,  0,  0,  0]))\n",
      "('action', 2, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 5, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 7, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 8, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 1, -1,  0,  1,  0,  0,  0,  0,  0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.023340043)\n",
      "('PREDICTED PROBS: ', array([ 0.16278179,  0.1618927 ,  0.1708388 ,  0.15728362,  0.17080083,\n",
      "        0.1764023 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '100000000000000000', '100000000010000000', '100100000010000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.06, 'Q+U', 0.15)\n",
      "('action', 1, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 2, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 3, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([0, 1, 0, 0, 0, 0, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.032600544)\n",
      "('PREDICTED PROBS: ', array([ 0.130566  ,  0.12778154,  0.13262199,  0.12035003,  0.12608878,\n",
      "        0.12002344,  0.11856411,  0.12400412], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '010000000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.06, 'Q+U', 0.16)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.13, 'Q+U', 0.16)\n",
      "('action', 2, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 3, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([0, 0, 1, 0, 0, 0, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.062751621)\n",
      "('PREDICTED PROBS: ', array([ 0.12221663,  0.1259046 ,  0.11912103,  0.13132258,  0.12669483,\n",
      "        0.12170939,  0.12316503,  0.12986596], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '001000000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.07, 'Q+U', 0.17)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.14, 'Q+U', 0.17)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.14, 'Q+U', 0.2)\n",
      "('action', 3, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([0, 0, 0, 1, 0, 0, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.13079408)\n",
      "('PREDICTED PROBS: ', array([ 0.11388718,  0.11916115,  0.13581496,  0.12321361,  0.13752633,\n",
      "        0.12009001,  0.12131605,  0.12899074], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000100000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.07, 'Q+U', 0.17)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.15, 'Q+U', 0.18)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.15, 'Q+U', 0.21)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.15, 'Q+U', 0.28)\n",
      "('action', 4, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([0, 0, 0, 0, 1, 0, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', -0.013157197)\n",
      "('PREDICTED PROBS: ', array([ 0.124334  ,  0.12674771,  0.11863354,  0.12588774,  0.12905204,\n",
      "        0.12364981,  0.12329565,  0.12839955], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000010000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.08, 'Q+U', 0.18)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.16, 'Q+U', 0.19)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.16, 'Q+U', 0.22)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.16, 'Q+U', 0.29)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.16, 'Q+U', 0.14)\n",
      "('action', 5, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 1, 0, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.02994795)\n",
      "('PREDICTED PROBS: ', array([ 0.12598661,  0.12698345,  0.13007449,  0.118946  ,  0.13011411,\n",
      "        0.11956299,  0.12224361,  0.1260888 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000001000000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.08, 'Q+U', 0.18)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.17, 'Q+U', 0.2)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.17, 'Q+U', 0.23)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.17, 'Q+U', 0.3)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.17, 'Q+U', 0.15)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.17, 'Q+U', 0.2)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 8, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.19064526)\n",
      "('PREDICTED PROBS: ', array([ 0.11810868,  0.12178975,  0.13709076,  0.11991346,  0.12912448,\n",
      "        0.13315654,  0.12458048,  0.11623582], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.09, 'Q+U', 0.19)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.18, 'Q+U', 0.21)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.18, 'Q+U', 0.24)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.18, 'Q+U', 0.31)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.18, 'Q+U', 0.16)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.18, 'Q+U', 0.2)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 8, 'N', 1, 'P', 0.11, 'W', 0.19, 'Q', 0.19, 'U', 0.17, 'Q+U', 0.37)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 2, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.21318115)\n",
      "('PREDICTED PROBS: ', array([ 0.13753787,  0.13934623,  0.14045218,  0.14920729,  0.15757318,\n",
      "        0.14332947,  0.13255374], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.09, 'Q+U', 0.19)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.19, 'Q+U', 0.22)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.19, 'Q+U', 0.25)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.18, 'Q+U', 0.32)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.18, 'Q+U', 0.17)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.18, 'Q+U', 0.21)\n",
      "('action', 6, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 8, 'N', 2, 'P', 0.11, 'W', 0.4, 'Q', 0.2, 'U', 0.12, 'Q+U', 0.32)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 1, 0, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', -0.0011500508)\n",
      "('PREDICTED PROBS: ', array([ 0.12690623,  0.12636235,  0.12556301,  0.12241898,  0.12920634,\n",
      "        0.12305215,  0.12066668,  0.12582424], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000100000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.1, 'Q+U', 0.19)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.19, 'Q+U', 0.23)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.19, 'Q+U', 0.26)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.19, 'Q+U', 0.32)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.19, 'Q+U', 0.18)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.19, 'Q+U', 0.22)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 7, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 8, 'N', 2, 'P', 0.11, 'W', 0.4, 'Q', 0.2, 'U', 0.13, 'Q+U', 0.33)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 1, 0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.064180352)\n",
      "('PREDICTED PROBS: ', array([ 0.12203958,  0.13031001,  0.12699561,  0.12647229,  0.12098373,\n",
      "        0.12221519,  0.12033489,  0.13064861], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000010000000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.1, 'Q+U', 0.2)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.2, 'Q+U', 0.23)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.2, 'Q+U', 0.26)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.2, 'Q+U', 0.33)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.2, 'Q+U', 0.19)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.2, 'Q+U', 0.23)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.2, 'Q+U', 0.26)\n",
      "('action', 8, 'N', 2, 'P', 0.11, 'W', 0.4, 'Q', 0.2, 'U', 0.13, 'Q+U', 0.33)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 2, 'N', 1, 'P', 0.14, 'W', 0.21, 'Q', 0.21, 'U', 0.1, 'Q+U', 0.31)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 5, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.25279593)\n",
      "('PREDICTED PROBS: ', array([ 0.16009538,  0.16686499,  0.16363367,  0.17994563,  0.16998406,\n",
      "        0.15947621], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.11, 'Q+U', 0.2)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.21, 'Q+U', 0.24)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.21, 'Q+U', 0.27)\n",
      "('action', 3, 'N', 1, 'P', 0.11, 'W', 0.13, 'Q', 0.13, 'U', 0.21, 'Q+U', 0.34)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.21, 'Q+U', 0.2)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.21, 'Q+U', 0.24)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.2, 'Q+U', 0.27)\n",
      "('action', 8, 'N', 3, 'P', 0.11, 'W', 0.66, 'Q', 0.22, 'U', 0.1, 'Q+U', 0.32)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([0, 0, 0, 1, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 0, 'P', 0.11, 'W', 0.0, 'Q', 0.0, 'U', 0.11, 'Q+U', 0.11)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 2, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 4, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 5, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 8, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  1,  0, -1,  0,  0,  0]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.14203724)\n",
      "('PREDICTED PROBS: ', array([ 0.13031943,  0.13821906,  0.15749285,  0.14188325,  0.13430142,\n",
      "        0.1416015 ,  0.15618251], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000100000000000000', '000100000000001000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.11, 'Q+U', 0.21)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.22, 'Q+U', 0.25)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.22, 'Q+U', 0.28)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.14, 'Q+U', 0.28)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.22, 'Q+U', 0.2)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.21, 'Q+U', 0.24)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.21, 'Q+U', 0.28)\n",
      "('action', 8, 'N', 3, 'P', 0.11, 'W', 0.66, 'Q', 0.22, 'U', 0.11, 'Q+U', 0.33)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 2, 'N', 2, 'P', 0.14, 'W', 0.47, 'Q', 0.23, 'U', 0.08, 'Q+U', 0.31)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 5, 'N', 1, 'P', 0.16, 'W', 0.25, 'Q', 0.25, 'U', 0.11, 'Q+U', 0.36)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 4, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.47719052)\n",
      "('PREDICTED PROBS: ', array([ 0.18611667,  0.20344944,  0.20242706,  0.21629705,  0.19170977], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.11, 'Q+U', 0.21)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.22, 'Q+U', 0.26)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.22, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.15, 'Q+U', 0.29)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.22, 'Q+U', 0.21)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.22, 'Q+U', 0.25)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.22, 'Q+U', 0.28)\n",
      "('action', 8, 'N', 4, 'P', 0.11, 'W', 1.13, 'Q', 0.28, 'U', 0.09, 'Q+U', 0.37)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 2, 'N', 3, 'P', 0.14, 'W', 0.94, 'Q', 0.31, 'U', 0.07, 'Q+U', 0.38)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 5, 'N', 2, 'P', 0.16, 'W', 0.73, 'Q', 0.36, 'U', 0.09, 'Q+U', 0.46)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 4, 'N', 1, 'P', 0.18, 'W', 0.48, 'Q', 0.48, 'U', 0.13, 'Q+U', 0.6)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 6, 'N', 0, 'P', 0.22, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.44740447)\n",
      "('PREDICTED PROBS: ', array([ 0.23970442,  0.2614927 ,  0.26077428,  0.23802863], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.12, 'Q+U', 0.21)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.23, 'Q+U', 0.26)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.23, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.15, 'Q+U', 0.29)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.23, 'Q+U', 0.22)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.23, 'Q+U', 0.26)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.23, 'Q+U', 0.29)\n",
      "('action', 8, 'N', 5, 'P', 0.11, 'W', 1.58, 'Q', 0.32, 'U', 0.08, 'Q+U', 0.39)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 2, 'N', 4, 'P', 0.14, 'W', 1.39, 'Q', 0.35, 'U', 0.06, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 5, 'N', 3, 'P', 0.16, 'W', 1.18, 'Q', 0.39, 'U', 0.08, 'Q+U', 0.47)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 4, 'N', 2, 'P', 0.18, 'W', 0.92, 'Q', 0.46, 'U', 0.1, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 6, 'N', 1, 'P', 0.22, 'W', 0.45, 'Q', 0.45, 'U', 0.15, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.66516483)\n",
      "('PREDICTED PROBS: ', array([ 0.30037597,  0.35440996,  0.34521404], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.12, 'Q+U', 0.22)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.24, 'Q+U', 0.27)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.24, 'Q+U', 0.3)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.16, 'Q+U', 0.29)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.24, 'Q+U', 0.22)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.23, 'Q+U', 0.26)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.23, 'Q+U', 0.3)\n",
      "('action', 8, 'N', 6, 'P', 0.11, 'W', 2.25, 'Q', 0.37, 'U', 0.07, 'Q+U', 0.44)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 2, 'N', 5, 'P', 0.14, 'W', 2.06, 'Q', 0.41, 'U', 0.06, 'Q+U', 0.47)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 5, 'N', 4, 'P', 0.16, 'W', 1.84, 'Q', 0.46, 'U', 0.07, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 4, 'N', 3, 'P', 0.18, 'W', 1.59, 'Q', 0.53, 'U', 0.09, 'Q+U', 0.62)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 6, 'N', 2, 'P', 0.22, 'W', 1.11, 'Q', 0.56, 'U', 0.12, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.26, 'W', 0.67, 'Q', 0.67, 'U', 0.18, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 3, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.89519179)\n",
      "('PREDICTED PROBS: ', array([ 0.42611924,  0.57388079], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.12, 'Q+U', 0.22)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.24, 'Q+U', 0.28)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.24, 'Q+U', 0.31)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.16, 'Q+U', 0.3)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.24, 'Q+U', 0.23)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.24, 'Q+U', 0.27)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.24, 'Q+U', 0.3)\n",
      "('action', 8, 'N', 7, 'P', 0.11, 'W', 3.14, 'Q', 0.45, 'U', 0.06, 'Q+U', 0.51)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 2, 'N', 6, 'P', 0.14, 'W', 2.95, 'Q', 0.49, 'U', 0.05, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 5, 'N', 5, 'P', 0.16, 'W', 2.74, 'Q', 0.55, 'U', 0.06, 'Q+U', 0.61)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 4, 'N', 4, 'P', 0.18, 'W', 2.48, 'Q', 0.62, 'U', 0.08, 'Q+U', 0.7)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 6, 'N', 3, 'P', 0.22, 'W', 2.01, 'Q', 0.67, 'U', 0.11, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 1, 'N', 2, 'P', 0.26, 'W', 1.56, 'Q', 0.78, 'U', 0.15, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 3, 'N', 1, 'P', 0.35, 'W', 0.9, 'Q', 0.9, 'U', 0.25, 'Q+U', 1.15)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 0, 'P', 0.57, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.13, 'Q+U', 0.22)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.25, 'Q+U', 0.28)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.25, 'Q+U', 0.31)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.17, 'Q+U', 0.3)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.25, 'Q+U', 0.24)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.25, 'Q+U', 0.28)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.24, 'Q+U', 0.31)\n",
      "('action', 8, 'N', 8, 'P', 0.11, 'W', 4.14, 'Q', 0.52, 'U', 0.05, 'Q+U', 0.57)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 2, 'N', 7, 'P', 0.14, 'W', 3.95, 'Q', 0.56, 'U', 0.05, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 5, 'N', 6, 'P', 0.16, 'W', 3.74, 'Q', 0.62, 'U', 0.06, 'Q+U', 0.68)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 5, 'P', 0.18, 'W', 3.48, 'Q', 0.7, 'U', 0.07, 'Q+U', 0.77)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 6, 'N', 4, 'P', 0.22, 'W', 3.01, 'Q', 0.75, 'U', 0.1, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 1, 'N', 3, 'P', 0.26, 'W', 2.56, 'Q', 0.85, 'U', 0.13, 'Q+U', 0.98)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 2, 'P', 0.35, 'W', 1.9, 'Q', 0.95, 'U', 0.2, 'Q+U', 1.15)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 1, 'P', 0.57, 'W', 1.0, 'Q', 1.0, 'U', 0.41, 'Q+U', 1.41)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.13, 'Q+U', 0.23)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.26, 'Q+U', 0.29)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.26, 'Q+U', 0.32)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.17, 'Q+U', 0.31)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.26, 'Q+U', 0.24)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.25, 'Q+U', 0.28)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.25, 'Q+U', 0.31)\n",
      "('action', 8, 'N', 9, 'P', 0.11, 'W', 5.14, 'Q', 0.57, 'U', 0.05, 'Q+U', 0.62)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 2, 'N', 8, 'P', 0.14, 'W', 4.95, 'Q', 0.62, 'U', 0.05, 'Q+U', 0.66)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 5, 'N', 7, 'P', 0.16, 'W', 4.74, 'Q', 0.68, 'U', 0.06, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 4, 'N', 6, 'P', 0.18, 'W', 4.48, 'Q', 0.75, 'U', 0.07, 'Q+U', 0.82)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 5, 'P', 0.22, 'W', 4.01, 'Q', 0.8, 'U', 0.09, 'Q+U', 0.89)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 1, 'N', 4, 'P', 0.26, 'W', 3.56, 'Q', 0.89, 'U', 0.12, 'Q+U', 1.01)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 3, 'P', 0.35, 'W', 2.9, 'Q', 0.97, 'U', 0.18, 'Q+U', 1.14)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 2, 'P', 0.57, 'W', 2.0, 'Q', 1.0, 'U', 0.33, 'Q+U', 1.33)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.13, 'Q+U', 0.23)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.26, 'Q+U', 0.29)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.26, 'Q+U', 0.32)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.17, 'Q+U', 0.31)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.26, 'Q+U', 0.25)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.26, 'Q+U', 0.29)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.26, 'Q+U', 0.32)\n",
      "('action', 8, 'N', 10, 'P', 0.11, 'W', 6.14, 'Q', 0.61, 'U', 0.05, 'Q+U', 0.66)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 2, 'N', 9, 'P', 0.14, 'W', 5.95, 'Q', 0.66, 'U', 0.04, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 5, 'N', 8, 'P', 0.16, 'W', 5.74, 'Q', 0.72, 'U', 0.05, 'Q+U', 0.77)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 7, 'P', 0.18, 'W', 5.48, 'Q', 0.78, 'U', 0.06, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 6, 'P', 0.22, 'W', 5.01, 'Q', 0.83, 'U', 0.08, 'Q+U', 0.92)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 5, 'P', 0.26, 'W', 4.56, 'Q', 0.91, 'U', 0.11, 'Q+U', 1.02)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 3, 'N', 4, 'P', 0.35, 'W', 3.9, 'Q', 0.97, 'U', 0.16, 'Q+U', 1.13)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 3, 'P', 0.57, 'W', 3.0, 'Q', 1.0, 'U', 0.29, 'Q+U', 1.29)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.14, 'Q+U', 0.23)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.27, 'Q+U', 0.3)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.27, 'Q+U', 0.33)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.18, 'Q+U', 0.31)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.27, 'Q+U', 0.25)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.27, 'Q+U', 0.3)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.26, 'Q+U', 0.33)\n",
      "('action', 8, 'N', 11, 'P', 0.11, 'W', 7.14, 'Q', 0.65, 'U', 0.04, 'Q+U', 0.69)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 2, 'N', 10, 'P', 0.14, 'W', 6.95, 'Q', 0.7, 'U', 0.04, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 5, 'N', 9, 'P', 0.16, 'W', 6.74, 'Q', 0.75, 'U', 0.05, 'Q+U', 0.8)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 8, 'P', 0.18, 'W', 6.48, 'Q', 0.81, 'U', 0.06, 'Q+U', 0.87)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 7, 'P', 0.22, 'W', 6.01, 'Q', 0.86, 'U', 0.08, 'Q+U', 0.93)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 6, 'P', 0.26, 'W', 5.56, 'Q', 0.93, 'U', 0.1, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 5, 'P', 0.35, 'W', 4.9, 'Q', 0.98, 'U', 0.14, 'Q+U', 1.12)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 7, 'N', 4, 'P', 0.57, 'W', 4.0, 'Q', 1.0, 'U', 0.26, 'Q+U', 1.26)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.14, 'Q+U', 0.23)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.27, 'Q+U', 0.31)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.27, 'Q+U', 0.34)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.18, 'Q+U', 0.32)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.27, 'Q+U', 0.26)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.27, 'Q+U', 0.3)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.27, 'Q+U', 0.33)\n",
      "('action', 8, 'N', 12, 'P', 0.11, 'W', 8.14, 'Q', 0.68, 'U', 0.04, 'Q+U', 0.72)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 2, 'N', 11, 'P', 0.14, 'W', 7.95, 'Q', 0.72, 'U', 0.04, 'Q+U', 0.76)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 5, 'N', 10, 'P', 0.16, 'W', 7.74, 'Q', 0.77, 'U', 0.05, 'Q+U', 0.82)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 4, 'N', 9, 'P', 0.18, 'W', 7.48, 'Q', 0.83, 'U', 0.06, 'Q+U', 0.89)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 6, 'N', 8, 'P', 0.22, 'W', 7.01, 'Q', 0.88, 'U', 0.07, 'Q+U', 0.95)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 7, 'P', 0.26, 'W', 6.56, 'Q', 0.94, 'U', 0.09, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 3, 'N', 6, 'P', 0.35, 'W', 5.9, 'Q', 0.98, 'U', 0.13, 'Q+U', 1.12)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 7, 'N', 5, 'P', 0.57, 'W', 5.0, 'Q', 1.0, 'U', 0.23, 'Q+U', 1.23)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.14, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.28, 'Q+U', 0.31)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.28, 'Q+U', 0.34)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.19, 'Q+U', 0.32)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.28, 'Q+U', 0.27)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.28, 'Q+U', 0.31)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.28, 'Q+U', 0.27)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.27, 'Q+U', 0.34)\n",
      "('action', 8, 'N', 13, 'P', 0.11, 'W', 9.14, 'Q', 0.7, 'U', 0.04, 'Q+U', 0.74)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 2, 'N', 12, 'P', 0.14, 'W', 8.95, 'Q', 0.75, 'U', 0.04, 'Q+U', 0.78)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 11, 'P', 0.16, 'W', 8.74, 'Q', 0.79, 'U', 0.05, 'Q+U', 0.84)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 4, 'N', 10, 'P', 0.18, 'W', 8.48, 'Q', 0.85, 'U', 0.05, 'Q+U', 0.9)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 6, 'N', 9, 'P', 0.22, 'W', 8.01, 'Q', 0.89, 'U', 0.07, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 8, 'P', 0.26, 'W', 7.56, 'Q', 0.95, 'U', 0.09, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 7, 'P', 0.35, 'W', 6.9, 'Q', 0.99, 'U', 0.13, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.98, 'Q+U', 0.98)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 1.13, 'Q+U', 1.13)\n",
      "('action', 7, 'N', 6, 'P', 0.57, 'W', 6.0, 'Q', 1.0, 'U', 0.22, 'Q+U', 1.22)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.14, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.29, 'Q+U', 0.32)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.28, 'Q+U', 0.35)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.19, 'Q+U', 0.33)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.28, 'Q+U', 0.27)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.28, 'Q+U', 0.31)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.28, 'Q+U', 0.34)\n",
      "('action', 8, 'N', 14, 'P', 0.11, 'W', 10.14, 'Q', 0.72, 'U', 0.04, 'Q+U', 0.76)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 2, 'N', 13, 'P', 0.14, 'W', 9.95, 'Q', 0.77, 'U', 0.04, 'Q+U', 0.8)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 5, 'N', 12, 'P', 0.16, 'W', 9.74, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 4, 'N', 11, 'P', 0.18, 'W', 9.48, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 10, 'P', 0.22, 'W', 9.01, 'Q', 0.9, 'U', 0.07, 'Q+U', 0.97)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 9, 'P', 0.26, 'W', 8.56, 'Q', 0.95, 'U', 0.08, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 3, 'N', 8, 'P', 0.35, 'W', 7.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 1.21, 'Q+U', 1.21)\n",
      "('action', 7, 'N', 7, 'P', 0.57, 'W', 7.0, 'Q', 1.0, 'U', 0.2, 'Q+U', 1.2)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101111010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.15, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.29, 'Q+U', 0.32)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.29, 'Q+U', 0.35)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.19, 'Q+U', 0.33)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.29, 'Q+U', 0.28)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.29, 'Q+U', 0.32)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.28, 'Q+U', 0.35)\n",
      "('action', 8, 'N', 15, 'P', 0.11, 'W', 11.14, 'Q', 0.74, 'U', 0.04, 'Q+U', 0.78)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 2, 'N', 14, 'P', 0.14, 'W', 10.95, 'Q', 0.78, 'U', 0.04, 'Q+U', 0.82)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 5, 'N', 13, 'P', 0.16, 'W', 10.74, 'Q', 0.83, 'U', 0.04, 'Q+U', 0.87)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 4, 'N', 12, 'P', 0.18, 'W', 10.48, 'Q', 0.87, 'U', 0.05, 'Q+U', 0.92)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 6, 'N', 11, 'P', 0.22, 'W', 10.01, 'Q', 0.91, 'U', 0.06, 'Q+U', 0.97)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 1, 'N', 10, 'P', 0.26, 'W', 9.56, 'Q', 0.96, 'U', 0.08, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 9, 'P', 0.35, 'W', 8.9, 'Q', 0.99, 'U', 0.11, 'Q+U', 1.1)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 1.09, 'Q+U', 1.09)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.43, 'W', 1.0, 'Q', 1.0, 'U', 0.64, 'Q+U', 1.64)\n",
      "('action', 7, 'N', 7, 'P', 0.57, 'W', 7.0, 'Q', 1.0, 'U', 0.22, 'Q+U', 1.22)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101111010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.15, 'Q+U', 0.25)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.3, 'Q+U', 0.33)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.3, 'Q+U', 0.36)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.2, 'Q+U', 0.33)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.29, 'Q+U', 0.28)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.29, 'Q+U', 0.32)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.29, 'Q+U', 0.35)\n",
      "('action', 8, 'N', 16, 'P', 0.11, 'W', 12.14, 'Q', 0.76, 'U', 0.03, 'Q+U', 0.79)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 2, 'N', 15, 'P', 0.14, 'W', 11.95, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 5, 'N', 14, 'P', 0.16, 'W', 11.74, 'Q', 0.84, 'U', 0.04, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 4, 'N', 13, 'P', 0.18, 'W', 11.48, 'Q', 0.88, 'U', 0.05, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 12, 'P', 0.22, 'W', 11.01, 'Q', 0.92, 'U', 0.06, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 1, 'N', 11, 'P', 0.26, 'W', 10.56, 'Q', 0.96, 'U', 0.08, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.0, 'Q+U', 1.0)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.11, 'Q+U', 1.1)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 1.14, 'Q+U', 1.14)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.15, 'Q+U', 0.25)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.3, 'Q+U', 0.33)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.3, 'Q+U', 0.36)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.2, 'Q+U', 0.34)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.3, 'Q+U', 0.29)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.3, 'Q+U', 0.33)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.29, 'Q+U', 0.36)\n",
      "('action', 8, 'N', 17, 'P', 0.11, 'W', 13.14, 'Q', 0.77, 'U', 0.03, 'Q+U', 0.81)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 2, 'N', 16, 'P', 0.14, 'W', 12.95, 'Q', 0.81, 'U', 0.03, 'Q+U', 0.84)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 5, 'N', 15, 'P', 0.16, 'W', 12.74, 'Q', 0.85, 'U', 0.04, 'Q+U', 0.89)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 4, 'N', 14, 'P', 0.18, 'W', 12.48, 'Q', 0.89, 'U', 0.05, 'Q+U', 0.94)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 6, 'N', 13, 'P', 0.22, 'W', 12.01, 'Q', 0.92, 'U', 0.06, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 1, 'N', 12, 'P', 0.26, 'W', 11.56, 'Q', 0.96, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.94, 'Q+U', 0.94)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.11, 'Q+U', 1.1)\n",
      "('action', 7, 'N', 1, 'P', 0.35, 'W', 1.0, 'Q', 1.0, 'U', 0.6, 'Q+U', 1.6)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.15, 'Q+U', 0.25)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.31, 'Q+U', 0.34)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.31, 'Q+U', 0.37)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.2, 'Q+U', 0.34)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.3, 'Q+U', 0.29)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.3, 'Q+U', 0.33)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.3, 'Q+U', 0.36)\n",
      "('action', 8, 'N', 18, 'P', 0.11, 'W', 14.14, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 2, 'N', 17, 'P', 0.14, 'W', 13.95, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 5, 'N', 16, 'P', 0.16, 'W', 13.74, 'Q', 0.86, 'U', 0.04, 'Q+U', 0.9)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 4, 'N', 15, 'P', 0.18, 'W', 13.48, 'Q', 0.9, 'U', 0.04, 'Q+U', 0.94)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 14, 'P', 0.22, 'W', 13.01, 'Q', 0.93, 'U', 0.06, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 1, 'N', 13, 'P', 0.26, 'W', 12.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.98, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.08, 'Q+U', 1.08)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 2, 'P', 0.35, 'W', 2.0, 'Q', 1.0, 'U', 0.41, 'Q+U', 1.41)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.16, 'Q+U', 0.25)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.31, 'Q+U', 0.34)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.31, 'Q+U', 0.37)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.21, 'Q+U', 0.34)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.31, 'Q+U', 0.3)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.31, 'Q+U', 0.34)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.3, 'Q+U', 0.37)\n",
      "('action', 8, 'N', 19, 'P', 0.11, 'W', 15.14, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 2, 'N', 18, 'P', 0.14, 'W', 14.95, 'Q', 0.83, 'U', 0.03, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 5, 'N', 17, 'P', 0.16, 'W', 14.74, 'Q', 0.87, 'U', 0.04, 'Q+U', 0.9)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 4, 'N', 16, 'P', 0.18, 'W', 14.48, 'Q', 0.91, 'U', 0.04, 'Q+U', 0.95)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 6, 'N', 15, 'P', 0.22, 'W', 14.01, 'Q', 0.93, 'U', 0.05, 'Q+U', 0.99)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 1, 'N', 14, 'P', 0.26, 'W', 13.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 1.01, 'Q+U', 1.01)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.92, 'Q+U', 0.92)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.12, 'Q+U', 1.12)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 3, 'P', 0.35, 'W', 3.0, 'Q', 1.0, 'U', 0.32, 'Q+U', 1.32)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.16, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.32, 'Q+U', 0.35)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.32, 'Q+U', 0.38)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.21, 'Q+U', 0.35)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.31, 'Q+U', 0.3)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.31, 'Q+U', 0.34)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.31, 'Q+U', 0.37)\n",
      "('action', 8, 'N', 20, 'P', 0.11, 'W', 16.14, 'Q', 0.81, 'U', 0.03, 'Q+U', 0.84)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 2, 'N', 19, 'P', 0.14, 'W', 15.95, 'Q', 0.84, 'U', 0.03, 'Q+U', 0.87)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 18, 'P', 0.16, 'W', 15.74, 'Q', 0.87, 'U', 0.04, 'Q+U', 0.91)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 4, 'N', 17, 'P', 0.18, 'W', 15.48, 'Q', 0.91, 'U', 0.04, 'Q+U', 0.95)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 16, 'P', 0.22, 'W', 15.01, 'Q', 0.94, 'U', 0.05, 'Q+U', 0.99)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.96, 'Q+U', 0.96)\n",
      "('action', 1, 'N', 15, 'P', 0.26, 'W', 14.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1, -1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37307718)\n",
      "('PREDICTED PROBS: ', array([ 0.3413097 ,  0.34924269,  0.30944762], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101001110000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.16, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.32, 'Q+U', 0.35)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.32, 'Q+U', 0.38)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.21, 'Q+U', 0.35)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.32, 'Q+U', 0.31)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.32, 'Q+U', 0.35)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.31, 'Q+U', 0.38)\n",
      "('action', 8, 'N', 21, 'P', 0.11, 'W', 16.51, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 2, 'N', 20, 'P', 0.14, 'W', 16.32, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 19, 'P', 0.16, 'W', 16.11, 'Q', 0.85, 'U', 0.04, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 4, 'N', 18, 'P', 0.18, 'W', 15.86, 'Q', 0.88, 'U', 0.04, 'Q+U', 0.92)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 17, 'P', 0.22, 'W', 15.38, 'Q', 0.9, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.99, 'Q+U', 0.99)\n",
      "('action', 1, 'N', 15, 'P', 0.26, 'W', 14.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.54, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.98, 'Q+U', 0.98)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.16, 'Q+U', 1.16)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 4, 'P', 0.35, 'W', 4.0, 'Q', 1.0, 'U', 0.27, 'Q+U', 1.27)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.16, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.33, 'Q+U', 0.36)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.33, 'Q+U', 0.39)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.22, 'Q+U', 0.35)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.32, 'Q+U', 0.31)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.32, 'Q+U', 0.35)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.32, 'Q+U', 0.38)\n",
      "('action', 8, 'N', 22, 'P', 0.11, 'W', 17.51, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.82)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 2, 'N', 21, 'P', 0.14, 'W', 17.32, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 5, 'N', 20, 'P', 0.16, 'W', 17.11, 'Q', 0.86, 'U', 0.03, 'Q+U', 0.89)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 4, 'N', 19, 'P', 0.18, 'W', 16.86, 'Q', 0.89, 'U', 0.04, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 18, 'P', 0.22, 'W', 16.38, 'Q', 0.91, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.02, 'Q+U', 1.02)\n",
      "('action', 1, 'N', 16, 'P', 0.26, 'W', 15.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.55, 'Q+U', 0.93)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.01, 'Q+U', 1.01)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.2, 'Q+U', 1.2)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.13, 'Q+U', 1.12)\n",
      "('action', 7, 'N', 5, 'P', 0.35, 'W', 5.0, 'Q', 1.0, 'U', 0.23, 'Q+U', 1.23)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.17, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.33, 'Q+U', 0.36)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.33, 'Q+U', 0.39)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.22, 'Q+U', 0.36)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.33, 'Q+U', 0.32)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.33, 'Q+U', 0.36)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.33, 'Q+U', 0.32)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.32, 'Q+U', 0.39)\n",
      "('action', 8, 'N', 23, 'P', 0.11, 'W', 18.51, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 2, 'N', 22, 'P', 0.14, 'W', 18.32, 'Q', 0.83, 'U', 0.03, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 5, 'N', 21, 'P', 0.16, 'W', 18.11, 'Q', 0.86, 'U', 0.03, 'Q+U', 0.9)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 4, 'N', 20, 'P', 0.18, 'W', 17.86, 'Q', 0.89, 'U', 0.04, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('action', 6, 'N', 19, 'P', 0.22, 'W', 17.38, 'Q', 0.91, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 1, 'N', 17, 'P', 0.26, 'W', 16.56, 'Q', 0.97, 'U', 0.06, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.57, 'Q+U', 0.94)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37860429)\n",
      "('PREDICTED PROBS: ', array([ 0.34713188,  0.34446198,  0.3084062 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101101010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.17, 'Q+U', 0.27)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.34, 'Q+U', 0.37)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.34, 'Q+U', 0.4)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.22, 'Q+U', 0.36)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.33, 'Q+U', 0.32)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.33, 'Q+U', 0.36)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.33, 'Q+U', 0.39)\n",
      "('action', 8, 'N', 24, 'P', 0.11, 'W', 18.89, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.81)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 2, 'N', 23, 'P', 0.14, 'W', 18.7, 'Q', 0.81, 'U', 0.03, 'Q+U', 0.84)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 5, 'N', 22, 'P', 0.16, 'W', 18.49, 'Q', 0.84, 'U', 0.03, 'Q+U', 0.87)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 4, 'N', 21, 'P', 0.18, 'W', 18.24, 'Q', 0.87, 'U', 0.04, 'Q+U', 0.91)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 20, 'P', 0.22, 'W', 17.76, 'Q', 0.89, 'U', 0.05, 'Q+U', 0.94)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.24, 'W', 0.38, 'Q', 0.38, 'U', 0.54, 'Q+U', 0.91)\n",
      "('action', 1, 'N', 17, 'P', 0.26, 'W', 16.56, 'Q', 0.97, 'U', 0.06, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.58, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.06, 'Q+U', 1.06)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.32278523)\n",
      "('PREDICTED PROBS: ', array([ 0.3193782 ,  0.33569166,  0.3449302 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000', '000001101001010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.17, 'Q+U', 0.27)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.34, 'Q+U', 0.37)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.34, 'Q+U', 0.4)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.23, 'Q+U', 0.36)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.34, 'Q+U', 0.33)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.34, 'Q+U', 0.37)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.34, 'Q+U', 0.33)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.33, 'Q+U', 0.4)\n",
      "('action', 8, 'N', 25, 'P', 0.11, 'W', 19.22, 'Q', 0.77, 'U', 0.03, 'Q+U', 0.79)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 24, 'P', 0.14, 'W', 19.03, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 5, 'N', 23, 'P', 0.16, 'W', 18.81, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 4, 'N', 22, 'P', 0.18, 'W', 18.56, 'Q', 0.84, 'U', 0.04, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0,  1, -1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.45065233)\n",
      "('PREDICTED PROBS: ', array([ 0.24604604,  0.25497061,  0.25636229,  0.24262106], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '010001001001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.17, 'Q+U', 0.27)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.34, 'Q+U', 0.38)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.34, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.23, 'Q+U', 0.37)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.34, 'Q+U', 0.33)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.34, 'Q+U', 0.37)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.34, 'Q+U', 0.4)\n",
      "('action', 8, 'N', 26, 'P', 0.11, 'W', 19.67, 'Q', 0.76, 'U', 0.03, 'Q+U', 0.78)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 2, 'N', 25, 'P', 0.14, 'W', 19.48, 'Q', 0.78, 'U', 0.03, 'Q+U', 0.81)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 5, 'N', 24, 'P', 0.16, 'W', 19.26, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 4, 'N', 23, 'P', 0.18, 'W', 19.01, 'Q', 0.83, 'U', 0.04, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('action', 1, 'N', 1, 'P', 0.2, 'W', 0.45, 'Q', 0.45, 'U', 0.49, 'Q+U', 0.94)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.97, 'Q+U', 0.97)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.92, 'Q+U', 0.92)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.71427965)\n",
      "('PREDICTED PROBS: ', array([ 0.21108095,  0.26273152,  0.27584106,  0.25034645], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000101001001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.18, 'Q+U', 0.27)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.35, 'Q+U', 0.38)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.35, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.23, 'Q+U', 0.37)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.35, 'Q+U', 0.33)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.35, 'Q+U', 0.38)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.34, 'Q+U', 0.41)\n",
      "('action', 8, 'N', 27, 'P', 0.11, 'W', 20.38, 'Q', 0.75, 'U', 0.02, 'Q+U', 0.78)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 2, 'N', 26, 'P', 0.14, 'W', 20.19, 'Q', 0.78, 'U', 0.03, 'Q+U', 0.8)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 5, 'N', 25, 'P', 0.16, 'W', 19.98, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 4, 'N', 24, 'P', 0.18, 'W', 19.72, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('action', 1, 'N', 1, 'P', 0.2, 'W', 0.45, 'Q', 0.45, 'U', 0.5, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 1, 'P', 0.2, 'W', 0.71, 'Q', 0.71, 'U', 0.5, 'Q+U', 1.21)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.94, 'Q+U', 0.94)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 1, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 6, 'N', 0, 'P', 0.28, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 7, 'N', 0, 'P', 0.25, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1, -1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.68630624)\n",
      "('PREDICTED PROBS: ', array([ 0.29034367,  0.36074233,  0.34891403], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000101001001010000', '000101001001010100'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.18, 'Q+U', 0.27)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.35, 'Q+U', 0.39)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.35, 'Q+U', 0.42)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.24, 'Q+U', 0.37)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.35, 'Q+U', 0.34)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.35, 'Q+U', 0.38)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.35, 'Q+U', 0.41)\n",
      "('action', 8, 'N', 28, 'P', 0.11, 'W', 21.07, 'Q', 0.75, 'U', 0.02, 'Q+U', 0.78)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 2, 'N', 27, 'P', 0.14, 'W', 20.88, 'Q', 0.77, 'U', 0.03, 'Q+U', 0.8)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 5, 'N', 26, 'P', 0.16, 'W', 20.66, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 4, 'N', 25, 'P', 0.18, 'W', 20.41, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1, -1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.22406963)\n",
      "('PREDICTED PROBS: ', array([ 0.19701564,  0.20021626,  0.20122319,  0.21086656,  0.19067836], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001000100'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.18, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.36, 'Q+U', 0.39)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.36, 'Q+U', 0.42)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.24, 'Q+U', 0.37)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.36, 'Q+U', 0.34)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.35, 'Q+U', 0.38)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.35, 'Q+U', 0.41)\n",
      "('action', 8, 'N', 29, 'P', 0.11, 'W', 21.29, 'Q', 0.73, 'U', 0.02, 'Q+U', 0.76)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 2, 'N', 28, 'P', 0.14, 'W', 21.1, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 5, 'N', 27, 'P', 0.16, 'W', 20.89, 'Q', 0.77, 'U', 0.03, 'Q+U', 0.8)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 1, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 4, 'N', 25, 'P', 0.18, 'W', 20.41, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.22, 'Q', 0.22, 'U', 0.44, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0,  0,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.44127065)\n",
      "('PREDICTED PROBS: ', array([ 0.18272465,  0.19099656,  0.23179895,  0.20274812,  0.19173171], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001011000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.18, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.36, 'Q+U', 0.4)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.36, 'Q+U', 0.42)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.24, 'Q+U', 0.38)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.36, 'Q+U', 0.35)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.36, 'Q+U', 0.39)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.35, 'Q+U', 0.42)\n",
      "('action', 8, 'N', 30, 'P', 0.11, 'W', 21.73, 'Q', 0.72, 'U', 0.02, 'Q+U', 0.75)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 2, 'N', 29, 'P', 0.14, 'W', 21.54, 'Q', 0.74, 'U', 0.03, 'Q+U', 0.77)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 5, 'N', 28, 'P', 0.16, 'W', 21.33, 'Q', 0.76, 'U', 0.03, 'Q+U', 0.79)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0,  1,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.065484203)\n",
      "('PREDICTED PROBS: ', array([ 0.16539781,  0.16856168,  0.1664065 ,  0.18515663,  0.15908659,\n",
      "        0.15539078], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000010001001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.18, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.37, 'Q+U', 0.4)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.43)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.24, 'Q+U', 0.38)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.37, 'Q+U', 0.35)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.36, 'Q+U', 0.39)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.36, 'Q+U', 0.42)\n",
      "('action', 8, 'N', 31, 'P', 0.11, 'W', 21.8, 'Q', 0.7, 'U', 0.02, 'Q+U', 0.73)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 30, 'P', 0.14, 'W', 21.61, 'Q', 0.72, 'U', 0.02, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 5, 'N', 28, 'P', 0.16, 'W', 21.33, 'Q', 0.76, 'U', 0.03, 'Q+U', 0.79)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 1, 'N', 1, 'P', 0.17, 'W', 0.44, 'Q', 0.44, 'U', 0.44, 'Q+U', 0.88)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('action', 4, 'N', 25, 'P', 0.18, 'W', 20.41, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.22, 'Q', 0.22, 'U', 0.45, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 3, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 4, 'N', 0, 'P', 0.23, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 6, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0, -1, -1,  0,  1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.39323422)\n",
      "('PREDICTED PROBS: ', array([ 0.23102276,  0.24902961,  0.26901194,  0.2509357 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001011000000', '000011001011000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.19, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.37, 'Q+U', 0.4)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.43)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.25, 'Q+U', 0.38)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.37, 'Q+U', 0.36)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.37, 'Q+U', 0.4)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.37, 'Q+U', 0.36)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.36, 'Q+U', 0.43)\n",
      "('action', 8, 'N', 32, 'P', 0.11, 'W', 22.19, 'Q', 0.69, 'U', 0.02, 'Q+U', 0.72)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 2, 'N', 31, 'P', 0.14, 'W', 22.0, 'Q', 0.71, 'U', 0.02, 'Q+U', 0.73)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 5, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0,  0, -1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.14561233)\n",
      "('PREDICTED PROBS: ', array([ 0.13681309,  0.14126484,  0.15138456,  0.13996866,  0.14918846,\n",
      "        0.14370428,  0.13767612], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000001000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.19, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.38, 'Q+U', 0.41)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.44)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.25, 'Q+U', 0.39)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.37, 'Q+U', 0.36)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.37, 'Q+U', 0.4)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.43)\n",
      "('action', 8, 'N', 33, 'P', 0.11, 'W', 22.34, 'Q', 0.68, 'U', 0.02, 'Q+U', 0.7)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 31, 'P', 0.14, 'W', 22.0, 'Q', 0.71, 'U', 0.02, 'Q+U', 0.73)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 4, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.38, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37868366)\n",
      "('PREDICTED PROBS: ', array([ 0.12436685,  0.13577549,  0.1655677 ,  0.13306893,  0.16756038,\n",
      "        0.14673129,  0.12692931], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.19, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.38, 'Q+U', 0.41)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.38, 'Q+U', 0.44)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.25, 'Q+U', 0.39)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.38, 'Q+U', 0.36)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.38, 'Q+U', 0.41)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.43)\n",
      "('action', 8, 'N', 34, 'P', 0.11, 'W', 22.72, 'Q', 0.67, 'U', 0.02, 'Q+U', 0.69)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 2, 'N', 31, 'P', 0.14, 'W', 22.0, 'Q', 0.71, 'U', 0.02, 'Q+U', 0.73)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 4, 'N', 1, 'P', 0.13, 'W', 0.38, 'Q', 0.38, 'U', 0.38, 'Q+U', 0.76)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.39, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 5, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.40192074)\n",
      "('PREDICTED PROBS: ', array([ 0.14380488,  0.16388261,  0.21290533,  0.15055078,  0.17281103,\n",
      "        0.15604539], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.19, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.38, 'Q+U', 0.42)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.38, 'Q+U', 0.45)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.25, 'Q+U', 0.39)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.38, 'Q+U', 0.37)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.38, 'Q+U', 0.41)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.44)\n",
      "('action', 8, 'N', 35, 'P', 0.11, 'W', 23.12, 'Q', 0.66, 'U', 0.02, 'Q+U', 0.68)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 2, 'N', 31, 'P', 0.14, 'W', 22.0, 'Q', 0.71, 'U', 0.03, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.25, 'Q+U', 0.64)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.39, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0,  0,  0,  0,  0, -1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.15460967)\n",
      "('PREDICTED PROBS: ', array([ 0.13793901,  0.13897049,  0.15236102,  0.13987085,  0.14624415,\n",
      "        0.1507524 ,  0.13386206], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000000100'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.2, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.39, 'Q+U', 0.42)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.45)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.26, 'Q+U', 0.39)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.39, 'Q+U', 0.37)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.38, 'Q+U', 0.41)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.38, 'Q+U', 0.44)\n",
      "('action', 8, 'N', 36, 'P', 0.11, 'W', 23.27, 'Q', 0.65, 'U', 0.02, 'Q+U', 0.67)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 2, 'N', 31, 'P', 0.14, 'W', 22.0, 'Q', 0.71, 'U', 0.03, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.26, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.4, 'Q+U', 0.55)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.37, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.16396093)\n",
      "('PREDICTED PROBS: ', array([ 0.15945381,  0.16320889,  0.1637553 ,  0.17429633,  0.18830368,\n",
      "        0.15098198], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000000101001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.2, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.39, 'Q+U', 0.42)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.45)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.26, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.39, 'Q+U', 0.38)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.39, 'Q+U', 0.42)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.39, 'Q+U', 0.38)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.38, 'Q+U', 0.45)\n",
      "('action', 8, 'N', 37, 'P', 0.11, 'W', 23.44, 'Q', 0.63, 'U', 0.02, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 2, 'N', 32, 'P', 0.14, 'W', 22.16, 'Q', 0.69, 'U', 0.03, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.26, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.4, 'Q+U', 0.55)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.38, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.34076038)\n",
      "('PREDICTED PROBS: ', array([ 0.12157354,  0.16703938,  0.12857537,  0.15178515,  0.16225082,\n",
      "        0.14110963,  0.12766612], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001010000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.2, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.4, 'Q+U', 0.43)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.46)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.26, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.39, 'Q+U', 0.38)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.39, 'Q+U', 0.42)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.45)\n",
      "('action', 8, 'N', 38, 'P', 0.11, 'W', 23.78, 'Q', 0.63, 'U', 0.02, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 1, 'P', 0.12, 'W', 0.34, 'Q', 0.34, 'U', 0.38, 'Q+U', 0.72)\n",
      "('action', 2, 'N', 32, 'P', 0.14, 'W', 22.16, 'Q', 0.69, 'U', 0.03, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.27, 'Q+U', 0.66)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.41, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.38, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0,  0, -1,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.063820578)\n",
      "('PREDICTED PROBS: ', array([ 0.14266065,  0.14321518,  0.14783075,  0.15055177,  0.15016669,\n",
      "        0.13430126,  0.13127367], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000100000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.2, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.4, 'Q+U', 0.43)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.46)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.27, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.4, 'Q+U', 0.38)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.4, 'Q+U', 0.43)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.45)\n",
      "('action', 8, 'N', 39, 'P', 0.11, 'W', 23.84, 'Q', 0.61, 'U', 0.02, 'Q+U', 0.63)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 1, 'P', 0.12, 'W', 0.34, 'Q', 0.34, 'U', 0.38, 'Q+U', 0.72)\n",
      "('action', 2, 'N', 32, 'P', 0.14, 'W', 22.16, 'Q', 0.69, 'U', 0.03, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.37, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.27, 'Q+U', 0.66)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.42, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.39, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1,  0,  0,  0,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.21599887)\n",
      "('PREDICTED PROBS: ', array([ 0.14825152,  0.14728928,  0.13888496,  0.15089837,  0.13916796,\n",
      "        0.14219952,  0.13330834], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001100000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.2, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.4, 'Q+U', 0.44)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.47)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.27, 'Q+U', 0.4)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.4, 'Q+U', 0.39)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.4, 'Q+U', 0.43)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.46)\n",
      "('action', 8, 'N', 40, 'P', 0.11, 'W', 24.06, 'Q', 0.6, 'U', 0.02, 'Q+U', 0.62)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.37, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 1, 'P', 0.12, 'W', 0.34, 'Q', 0.34, 'U', 0.39, 'Q+U', 0.73)\n",
      "('action', 2, 'N', 32, 'P', 0.14, 'W', 22.16, 'Q', 0.69, 'U', 0.03, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.38, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.27, 'Q+U', 0.66)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.42, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.39, 'Q+U', 0.55)\n",
      "('action', 7, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0,  0,  0,  0,  0,  0,  0, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.0099685611)\n",
      "('PREDICTED PROBS: ', array([ 0.14209978,  0.14370593,  0.13904867,  0.14232688,  0.14559409,\n",
      "        0.14591987,  0.14130481], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000000010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.21, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.41, 'Q+U', 0.44)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.27, 'Q+U', 0.41)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.41, 'Q+U', 0.39)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.4, 'Q+U', 0.43)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.46)\n",
      "('action', 8, 'N', 41, 'P', 0.11, 'W', 24.07, 'Q', 0.59, 'U', 0.02, 'Q+U', 0.61)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.38, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 1, 'P', 0.12, 'W', 0.34, 'Q', 0.34, 'U', 0.39, 'Q+U', 0.73)\n",
      "('action', 2, 'N', 32, 'P', 0.14, 'W', 22.16, 'Q', 0.69, 'U', 0.03, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.38, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.28, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.43, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.4, 'Q+U', 0.55)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.37, 'Q+U', 0.38)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.12, 'Q+U', 0.12)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('action', 4, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 5, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 6, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.13, 'Q+U', 0.13)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.31954581)\n",
      "('PREDICTED PROBS: ', array([ 0.14586478,  0.1508449 ,  0.18769579,  0.19444412,  0.16554601,\n",
      "        0.15560442], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001010000000', '001000001010000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.21, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.41, 'Q+U', 0.44)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.27, 'Q+U', 0.41)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.41, 'Q+U', 0.4)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.41, 'Q+U', 0.44)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.41, 'Q+U', 0.4)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.47)\n",
      "('action', 8, 'N', 42, 'P', 0.11, 'W', 24.39, 'Q', 0.58, 'U', 0.02, 'Q+U', 0.6)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.38, 'Q+U', 0.6)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.26, 'Q+U', 0.59)\n",
      "('action', 2, 'N', 32, 'P', 0.14, 'W', 22.16, 'Q', 0.69, 'U', 0.03, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.28, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.43, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.4, 'Q+U', 0.56)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.38, 'Q+U', 0.39)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 3, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.42, 'Q+U', 0.49)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.41, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.36450663)\n",
      "('PREDICTED PROBS: ', array([ 0.14849815,  0.15747039,  0.17534485,  0.20768167,  0.15885684,\n",
      "        0.15214808], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000100001001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.21, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.41, 'Q+U', 0.45)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.48)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.28, 'Q+U', 0.41)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.41, 'Q+U', 0.4)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.41, 'Q+U', 0.44)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 8, 'N', 43, 'P', 0.11, 'W', 24.75, 'Q', 0.58, 'U', 0.02, 'Q+U', 0.59)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.39, 'Q+U', 0.6)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.27, 'Q+U', 0.6)\n",
      "('action', 2, 'N', 33, 'P', 0.14, 'W', 22.53, 'Q', 0.68, 'U', 0.03, 'Q+U', 0.71)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.39, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.28, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.44, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.41, 'Q+U', 0.56)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.38, 'Q+U', 0.39)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 3, 'N', 1, 'P', 0.14, 'W', 0.36, 'Q', 0.36, 'U', 0.4, 'Q+U', 0.77)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.43, 'Q+U', 0.49)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.41, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0,  1, -1,  0,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.13504823)\n",
      "('PREDICTED PROBS: ', array([ 0.16581704,  0.16918647,  0.15995897,  0.18642451,  0.16233607,\n",
      "        0.15627699], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '010000001001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.21, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.42, 'Q+U', 0.45)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.28, 'Q+U', 0.41)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.42, 'Q+U', 0.4)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.41, 'Q+U', 0.44)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 8, 'N', 44, 'P', 0.11, 'W', 24.89, 'Q', 0.57, 'U', 0.02, 'Q+U', 0.58)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.39, 'Q+U', 0.61)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.27, 'Q+U', 0.6)\n",
      "('action', 2, 'N', 34, 'P', 0.14, 'W', 22.66, 'Q', 0.67, 'U', 0.03, 'Q+U', 0.69)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.29, 'Q+U', 0.68)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.44, 'Q+U', 0.59)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.41, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.39, 'Q+U', 0.4)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 1, 'N', 1, 'P', 0.14, 'W', 0.14, 'Q', 0.14, 'U', 0.41, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 1, 'P', 0.14, 'W', 0.36, 'Q', 0.36, 'U', 0.41, 'Q+U', 0.77)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.42, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([ 1,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.11126245)\n",
      "('PREDICTED PROBS: ', array([ 0.16979167,  0.16520377,  0.1657342 ,  0.17568363,  0.1653972 ,\n",
      "        0.15818959], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '100000001001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.21, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.42, 'Q+U', 0.45)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.28, 'Q+U', 0.42)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.42, 'Q+U', 0.41)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.42, 'Q+U', 0.45)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.48)\n",
      "('action', 8, 'N', 45, 'P', 0.11, 'W', 25.0, 'Q', 0.56, 'U', 0.02, 'Q+U', 0.57)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.4, 'Q+U', 0.61)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.27, 'Q+U', 0.6)\n",
      "('action', 2, 'N', 35, 'P', 0.14, 'W', 22.78, 'Q', 0.65, 'U', 0.03, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.4, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 2, 'P', 0.13, 'W', 0.78, 'Q', 0.39, 'U', 0.29, 'Q+U', 0.68)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.45, 'Q+U', 0.59)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.42, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.39, 'Q+U', 0.4)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 5, 'N', 1, 'P', 0.17, 'W', 0.4, 'Q', 0.4, 'U', 0.12, 'Q+U', 0.52)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.14, 'Q+U', 0.14)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 2, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.47719052)\n",
      "('PREDICTED PROBS: ', array([ 0.18611667,  0.20344944,  0.20242706,  0.21629705,  0.19170977], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.21, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.43, 'Q+U', 0.46)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.49)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.28, 'Q+U', 0.42)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.42, 'Q+U', 0.41)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.42, 'Q+U', 0.45)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 8, 'N', 46, 'P', 0.11, 'W', 25.47, 'Q', 0.55, 'U', 0.02, 'Q+U', 0.57)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.4, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.28, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 35, 'P', 0.14, 'W', 22.78, 'Q', 0.65, 'U', 0.03, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 3, 'P', 0.13, 'W', 1.26, 'Q', 0.42, 'U', 0.22, 'Q+U', 0.64)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.45, 'Q+U', 0.6)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.42, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.39, 'Q+U', 0.4)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.14, 'W', 0.11, 'Q', 0.11, 'U', 0.41, 'Q+U', 0.52)\n",
      "('action', 1, 'N', 1, 'P', 0.14, 'W', 0.14, 'Q', 0.14, 'U', 0.41, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 1, 'P', 0.14, 'W', 0.36, 'Q', 0.36, 'U', 0.42, 'Q+U', 0.78)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.44, 'Q+U', 0.51)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.42, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.078889444)\n",
      "('PREDICTED PROBS: ', array([ 0.16559628,  0.16561472,  0.16493616,  0.16778858,  0.17397338,\n",
      "        0.16209088], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000000011001000000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.22, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.43, 'Q+U', 0.46)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.49)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.29, 'Q+U', 0.42)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.43, 'Q+U', 0.41)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.43, 'Q+U', 0.46)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 8, 'N', 47, 'P', 0.11, 'W', 25.55, 'Q', 0.54, 'U', 0.02, 'Q+U', 0.56)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.4, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.28, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 36, 'P', 0.14, 'W', 22.85, 'Q', 0.63, 'U', 0.03, 'Q+U', 0.66)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.41, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 3, 'P', 0.13, 'W', 1.26, 'Q', 0.42, 'U', 0.22, 'Q+U', 0.64)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.46, 'Q+U', 0.6)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.43, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.4, 'Q+U', 0.41)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.14, 'W', 0.11, 'Q', 0.11, 'U', 0.41, 'Q+U', 0.52)\n",
      "('action', 1, 'N', 1, 'P', 0.14, 'W', 0.14, 'Q', 0.14, 'U', 0.42, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 1, 'P', 0.14, 'W', 0.36, 'Q', 0.36, 'U', 0.42, 'Q+U', 0.79)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.45, 'Q+U', 0.51)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.43, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 1, 'P', 0.13, 'W', 0.08, 'Q', 0.08, 'U', 0.4, 'Q+U', 0.48)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 4, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 5, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 6, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 7, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  1,  0, -1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37615418)\n",
      "('PREDICTED PROBS: ', array([ 0.18847628,  0.20080608,  0.22113647,  0.19480786,  0.19477338], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000100001001000000', '000100001001001000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.22, 'Q+U', 0.31)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.43, 'Q+U', 0.47)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.29, 'Q+U', 0.42)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.43, 'Q+U', 0.42)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.43, 'Q+U', 0.46)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.49)\n",
      "('action', 8, 'N', 48, 'P', 0.11, 'W', 25.93, 'Q', 0.54, 'U', 0.02, 'Q+U', 0.56)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.41, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.28, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 37, 'P', 0.14, 'W', 23.23, 'Q', 0.63, 'U', 0.02, 'Q+U', 0.65)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 3, 'P', 0.13, 'W', 1.26, 'Q', 0.42, 'U', 0.22, 'Q+U', 0.64)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.46, 'Q+U', 0.61)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.43, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.4, 'Q+U', 0.41)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.14, 'W', 0.11, 'Q', 0.11, 'U', 0.42, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 1, 'P', 0.14, 'W', 0.14, 'Q', 0.14, 'U', 0.42, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 2, 'P', 0.14, 'W', 0.74, 'Q', 0.37, 'U', 0.28, 'Q+U', 0.66)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.45, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 29, 'P', 0.16, 'W', 21.72, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.44, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 1, 'P', 0.13, 'W', 0.08, 'Q', 0.08, 'U', 0.4, 'Q+U', 0.48)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 1, 'N', 2, 'P', 0.17, 'W', 0.83, 'Q', 0.42, 'U', 0.3, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('action', 4, 'N', 1, 'P', 0.21, 'W', 0.48, 'Q', 0.48, 'U', 0.57, 'Q+U', 1.05)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.22, 'Q', 0.22, 'U', 0.46, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 6, 'N', 0, 'P', 0.22, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.19, 'Q+U', 0.19)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.44740447)\n",
      "('PREDICTED PROBS: ', array([ 0.23970442,  0.2614927 ,  0.26077428,  0.23802863], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001010000', '000001101001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.22, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.44, 'Q+U', 0.47)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.29, 'Q+U', 0.43)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.43, 'Q+U', 0.42)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.43, 'Q+U', 0.46)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.49)\n",
      "('action', 8, 'N', 49, 'P', 0.11, 'W', 26.38, 'Q', 0.54, 'U', 0.02, 'Q+U', 0.56)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.41, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.28, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 38, 'P', 0.14, 'W', 23.68, 'Q', 0.62, 'U', 0.02, 'Q+U', 0.65)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 3, 'P', 0.13, 'W', 1.26, 'Q', 0.42, 'U', 0.23, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.47, 'Q+U', 0.61)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.44, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.41, 'Q+U', 0.42)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.14, 'W', 0.11, 'Q', 0.11, 'U', 0.42, 'Q+U', 0.54)\n",
      "('action', 1, 'N', 1, 'P', 0.14, 'W', 0.14, 'Q', 0.14, 'U', 0.43, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 2, 'P', 0.14, 'W', 0.74, 'Q', 0.37, 'U', 0.29, 'Q+U', 0.66)\n",
      "('action', 4, 'N', 1, 'P', 0.15, 'W', 0.07, 'Q', 0.07, 'U', 0.46, 'Q+U', 0.53)\n",
      "('action', 5, 'N', 30, 'P', 0.16, 'W', 22.17, 'Q', 0.74, 'U', 0.03, 'Q+U', 0.77)\n",
      "('action', 6, 'N', 1, 'P', 0.14, 'W', 0.16, 'Q', 0.16, 'U', 0.44, 'Q+U', 0.61)\n",
      "('action', 7, 'N', 1, 'P', 0.13, 'W', 0.08, 'Q', 0.08, 'U', 0.41, 'Q+U', 0.49)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0, -1,  0,  0,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('action', 1, 'N', 2, 'P', 0.17, 'W', 0.83, 'Q', 0.42, 'U', 0.3, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 4, 'N', 2, 'P', 0.21, 'W', 0.92, 'Q', 0.46, 'U', 0.39, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.22, 'Q', 0.22, 'U', 0.47, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1, -1,  0,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.20552064)\n",
      "('PREDICTED PROBS: ', array([ 0.20057803,  0.20039889,  0.2108579 ,  0.19809008,  0.19007507], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001001000000', '000001001001000000', '000001001001100000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.22, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.44, 'Q+U', 0.47)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.29, 'Q+U', 0.43)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.44, 'Q+U', 0.43)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.44, 'Q+U', 0.47)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.49)\n",
      "('action', 8, 'N', 50, 'P', 0.11, 'W', 26.58, 'Q', 0.53, 'U', 0.02, 'Q+U', 0.55)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.42, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.29, 'Q+U', 0.62)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.02, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.42, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 3, 'P', 0.13, 'W', 1.26, 'Q', 0.42, 'U', 0.23, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.47, 'Q+U', 0.62)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.44, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.41, 'Q+U', 0.42)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 5, 'N', 2, 'P', 0.17, 'W', 0.88, 'Q', 0.44, 'U', 0.1, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.23, 'Q+U', 0.23)\n",
      "('action', 2, 'N', 2, 'P', 0.21, 'W', 0.92, 'Q', 0.46, 'U', 0.1, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.22, 'Q+U', 0.22)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 6, 'N', 1, 'P', 0.22, 'W', 0.45, 'Q', 0.45, 'U', 0.15, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 1, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.66516483)\n",
      "('PREDICTED PROBS: ', array([ 0.30037597,  0.35440996,  0.34521404], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.22, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.44, 'Q+U', 0.48)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.51)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.3, 'Q+U', 0.43)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.44, 'Q+U', 0.43)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.44, 'Q+U', 0.47)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.5)\n",
      "('action', 8, 'N', 51, 'P', 0.11, 'W', 27.25, 'Q', 0.53, 'U', 0.02, 'Q+U', 0.55)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.42, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.29, 'Q+U', 0.62)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.02, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 4, 'P', 0.13, 'W', 1.92, 'Q', 0.48, 'U', 0.18, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.48, 'Q+U', 0.62)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.44, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.42, 'Q+U', 0.43)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('action', 5, 'N', 3, 'P', 0.17, 'W', 1.54, 'Q', 0.51, 'U', 0.08, 'Q+U', 0.6)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 2, 'N', 3, 'P', 0.21, 'W', 1.59, 'Q', 0.53, 'U', 0.09, 'Q+U', 0.62)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.27, 'Q+U', 0.27)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 6, 'N', 2, 'P', 0.22, 'W', 1.11, 'Q', 0.56, 'U', 0.12, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.26, 'W', 0.67, 'Q', 0.67, 'U', 0.18, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 3, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.89519179)\n",
      "('PREDICTED PROBS: ', array([ 0.42611924,  0.57388079], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.23, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.45, 'Q+U', 0.48)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.51)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.3, 'Q+U', 0.43)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.45, 'Q+U', 0.43)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.44, 'Q+U', 0.47)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 8, 'N', 52, 'P', 0.11, 'W', 28.14, 'Q', 0.54, 'U', 0.02, 'Q+U', 0.56)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.43, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.29, 'Q+U', 0.62)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.02, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.43, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 5, 'P', 0.13, 'W', 2.82, 'Q', 0.56, 'U', 0.16, 'Q+U', 0.72)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.48, 'Q+U', 0.63)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.45, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.42, 'Q+U', 0.43)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 5, 'N', 4, 'P', 0.17, 'W', 2.44, 'Q', 0.61, 'U', 0.07, 'Q+U', 0.68)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 2, 'N', 4, 'P', 0.21, 'W', 2.48, 'Q', 0.62, 'U', 0.09, 'Q+U', 0.71)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 6, 'N', 3, 'P', 0.22, 'W', 2.01, 'Q', 0.67, 'U', 0.11, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 1, 'N', 2, 'P', 0.26, 'W', 1.56, 'Q', 0.78, 'U', 0.15, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 3, 'N', 1, 'P', 0.35, 'W', 0.9, 'Q', 0.9, 'U', 0.25, 'Q+U', 1.15)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 7, 'N', 0, 'P', 0.57, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.23, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.45, 'Q+U', 0.48)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.51)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.3, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.45, 'Q+U', 0.44)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.45, 'Q+U', 0.48)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 8, 'N', 53, 'P', 0.11, 'W', 29.14, 'Q', 0.55, 'U', 0.02, 'Q+U', 0.57)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.43, 'Q+U', 0.65)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.3, 'Q+U', 0.63)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.02, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 6, 'P', 0.13, 'W', 3.82, 'Q', 0.64, 'U', 0.13, 'Q+U', 0.77)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.48, 'Q+U', 0.63)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.45, 'Q+U', 0.61)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.42, 'Q+U', 0.43)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 5, 'N', 5, 'P', 0.17, 'W', 3.44, 'Q', 0.69, 'U', 0.07, 'Q+U', 0.76)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.31, 'Q+U', 0.31)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 2, 'N', 5, 'P', 0.21, 'W', 3.48, 'Q', 0.7, 'U', 0.08, 'Q+U', 0.78)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 6, 'N', 4, 'P', 0.22, 'W', 3.01, 'Q', 0.75, 'U', 0.1, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 1, 'N', 3, 'P', 0.26, 'W', 2.56, 'Q', 0.85, 'U', 0.13, 'Q+U', 0.98)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 2, 'P', 0.35, 'W', 1.9, 'Q', 0.95, 'U', 0.2, 'Q+U', 1.15)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 1, 'P', 0.57, 'W', 1.0, 'Q', 1.0, 'U', 0.41, 'Q+U', 1.41)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.23, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.45, 'Q+U', 0.49)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.3, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.45, 'Q+U', 0.44)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.45, 'Q+U', 0.48)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.51)\n",
      "('action', 8, 'N', 54, 'P', 0.11, 'W', 30.14, 'Q', 0.56, 'U', 0.02, 'Q+U', 0.57)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.43, 'Q+U', 0.65)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.3, 'Q+U', 0.63)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 7, 'P', 0.13, 'W', 4.82, 'Q', 0.69, 'U', 0.12, 'Q+U', 0.81)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.49, 'Q+U', 0.63)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.46, 'Q+U', 0.61)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.43, 'Q+U', 0.44)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 5, 'N', 6, 'P', 0.17, 'W', 4.44, 'Q', 0.74, 'U', 0.06, 'Q+U', 0.8)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 2, 'N', 6, 'P', 0.21, 'W', 4.48, 'Q', 0.75, 'U', 0.07, 'Q+U', 0.82)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 5, 'P', 0.22, 'W', 4.01, 'Q', 0.8, 'U', 0.09, 'Q+U', 0.89)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 1, 'N', 4, 'P', 0.26, 'W', 3.56, 'Q', 0.89, 'U', 0.12, 'Q+U', 1.01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 3, 'P', 0.35, 'W', 2.9, 'Q', 0.97, 'U', 0.18, 'Q+U', 1.14)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 2, 'P', 0.57, 'W', 2.0, 'Q', 1.0, 'U', 0.33, 'Q+U', 1.33)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.23, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.46, 'Q+U', 0.49)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.3, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.46, 'Q+U', 0.44)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.45, 'Q+U', 0.48)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.51)\n",
      "('action', 8, 'N', 55, 'P', 0.11, 'W', 31.14, 'Q', 0.57, 'U', 0.02, 'Q+U', 0.58)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.44, 'Q+U', 0.65)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.3, 'Q+U', 0.63)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.44, 'Q+U', 0.51)\n",
      "('action', 4, 'N', 8, 'P', 0.13, 'W', 5.82, 'Q', 0.73, 'U', 0.11, 'Q+U', 0.83)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.49, 'Q+U', 0.64)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.46, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.43, 'Q+U', 0.44)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 5, 'N', 7, 'P', 0.17, 'W', 5.44, 'Q', 0.78, 'U', 0.06, 'Q+U', 0.84)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 2, 'N', 7, 'P', 0.21, 'W', 5.48, 'Q', 0.78, 'U', 0.07, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 6, 'P', 0.22, 'W', 5.01, 'Q', 0.83, 'U', 0.08, 'Q+U', 0.92)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 5, 'P', 0.26, 'W', 4.56, 'Q', 0.91, 'U', 0.11, 'Q+U', 1.02)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 3, 'N', 4, 'P', 0.35, 'W', 3.9, 'Q', 0.97, 'U', 0.16, 'Q+U', 1.13)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 3, 'P', 0.57, 'W', 3.0, 'Q', 1.0, 'U', 0.29, 'Q+U', 1.29)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.23, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.46, 'Q+U', 0.49)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.31, 'Q+U', 0.44)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.46, 'Q+U', 0.45)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.46, 'Q+U', 0.49)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.51)\n",
      "('action', 8, 'N', 56, 'P', 0.11, 'W', 32.14, 'Q', 0.57, 'U', 0.02, 'Q+U', 0.59)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.44, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.3, 'Q+U', 0.63)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.51)\n",
      "('action', 4, 'N', 9, 'P', 0.13, 'W', 6.82, 'Q', 0.76, 'U', 0.1, 'Q+U', 0.85)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.5, 'Q+U', 0.64)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.47, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.43, 'Q+U', 0.44)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('action', 5, 'N', 8, 'P', 0.17, 'W', 6.44, 'Q', 0.8, 'U', 0.06, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 2, 'N', 8, 'P', 0.21, 'W', 6.48, 'Q', 0.81, 'U', 0.07, 'Q+U', 0.88)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 7, 'P', 0.22, 'W', 6.01, 'Q', 0.86, 'U', 0.08, 'Q+U', 0.93)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 6, 'P', 0.26, 'W', 5.56, 'Q', 0.93, 'U', 0.1, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 5, 'P', 0.35, 'W', 4.9, 'Q', 0.98, 'U', 0.14, 'Q+U', 1.12)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 7, 'N', 4, 'P', 0.57, 'W', 4.0, 'Q', 1.0, 'U', 0.26, 'Q+U', 1.26)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.23, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.46, 'Q+U', 0.5)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.53)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.31, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.46, 'Q+U', 0.45)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.46, 'Q+U', 0.49)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.52)\n",
      "('action', 8, 'N', 57, 'P', 0.11, 'W', 33.14, 'Q', 0.58, 'U', 0.02, 'Q+U', 0.6)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.45, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.31, 'Q+U', 0.64)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.45, 'Q+U', 0.52)\n",
      "('action', 4, 'N', 10, 'P', 0.13, 'W', 7.82, 'Q', 0.78, 'U', 0.09, 'Q+U', 0.87)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.5, 'Q+U', 0.65)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.47, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.44, 'Q+U', 0.45)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 5, 'N', 9, 'P', 0.17, 'W', 7.44, 'Q', 0.83, 'U', 0.05, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.4, 'Q+U', 0.4)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 2, 'N', 9, 'P', 0.21, 'W', 7.48, 'Q', 0.83, 'U', 0.06, 'Q+U', 0.9)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 6, 'N', 8, 'P', 0.22, 'W', 7.01, 'Q', 0.88, 'U', 0.07, 'Q+U', 0.95)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 7, 'P', 0.26, 'W', 6.56, 'Q', 0.94, 'U', 0.09, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 3, 'N', 6, 'P', 0.35, 'W', 5.9, 'Q', 0.98, 'U', 0.13, 'Q+U', 1.12)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 7, 'N', 5, 'P', 0.57, 'W', 5.0, 'Q', 1.0, 'U', 0.23, 'Q+U', 1.23)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.24, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.47, 'Q+U', 0.5)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.53)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.31, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.47, 'Q+U', 0.45)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.46, 'Q+U', 0.49)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.52)\n",
      "('action', 8, 'N', 58, 'P', 0.11, 'W', 34.14, 'Q', 0.59, 'U', 0.02, 'Q+U', 0.6)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.45, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.31, 'Q+U', 0.64)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.52)\n",
      "('action', 4, 'N', 11, 'P', 0.13, 'W', 8.82, 'Q', 0.8, 'U', 0.08, 'Q+U', 0.88)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.51, 'Q+U', 0.65)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.47, 'Q+U', 0.63)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.44, 'Q+U', 0.45)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('action', 5, 'N', 10, 'P', 0.17, 'W', 8.44, 'Q', 0.84, 'U', 0.05, 'Q+U', 0.89)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 2, 'N', 10, 'P', 0.21, 'W', 8.48, 'Q', 0.85, 'U', 0.06, 'Q+U', 0.91)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 6, 'N', 9, 'P', 0.22, 'W', 8.01, 'Q', 0.89, 'U', 0.07, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 8, 'P', 0.26, 'W', 7.56, 'Q', 0.95, 'U', 0.09, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 7, 'P', 0.35, 'W', 6.9, 'Q', 0.99, 'U', 0.13, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.98, 'Q+U', 0.98)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 1.13, 'Q+U', 1.13)\n",
      "('action', 7, 'N', 6, 'P', 0.57, 'W', 6.0, 'Q', 1.0, 'U', 0.22, 'Q+U', 1.22)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101011010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.24, 'Q+U', 0.33)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.47, 'Q+U', 0.5)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.53)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.31, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.47, 'Q+U', 0.46)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.47, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.52)\n",
      "('action', 8, 'N', 59, 'P', 0.11, 'W', 35.14, 'Q', 0.6, 'U', 0.02, 'Q+U', 0.61)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.45, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.31, 'Q+U', 0.64)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.52)\n",
      "('action', 4, 'N', 12, 'P', 0.13, 'W', 9.82, 'Q', 0.82, 'U', 0.08, 'Q+U', 0.89)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.51, 'Q+U', 0.66)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.48, 'Q+U', 0.63)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.45, 'Q+U', 0.46)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.43, 'Q+U', 0.43)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 5, 'N', 11, 'P', 0.17, 'W', 9.44, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.44, 'Q+U', 0.44)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 2, 'N', 11, 'P', 0.21, 'W', 9.48, 'Q', 0.86, 'U', 0.06, 'Q+U', 0.92)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 10, 'P', 0.22, 'W', 9.01, 'Q', 0.9, 'U', 0.07, 'Q+U', 0.97)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 9, 'P', 0.26, 'W', 8.56, 'Q', 0.95, 'U', 0.08, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 3, 'N', 8, 'P', 0.35, 'W', 7.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.43, 'W', 0.0, 'Q', 0.0, 'U', 1.21, 'Q+U', 1.21)\n",
      "('action', 7, 'N', 7, 'P', 0.57, 'W', 7.0, 'Q', 1.0, 'U', 0.2, 'Q+U', 1.2)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101111010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.24, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.47, 'Q+U', 0.51)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.32, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.47, 'Q+U', 0.46)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.47, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.53)\n",
      "('action', 8, 'N', 60, 'P', 0.11, 'W', 36.14, 'Q', 0.6, 'U', 0.02, 'Q+U', 0.62)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.46, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.31, 'Q+U', 0.64)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.46, 'Q+U', 0.53)\n",
      "('action', 4, 'N', 13, 'P', 0.13, 'W', 10.82, 'Q', 0.83, 'U', 0.07, 'Q+U', 0.9)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.52, 'Q+U', 0.66)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.48, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.45, 'Q+U', 0.46)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 12, 'P', 0.17, 'W', 10.44, 'Q', 0.87, 'U', 0.05, 'Q+U', 0.92)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 2, 'N', 12, 'P', 0.21, 'W', 10.48, 'Q', 0.87, 'U', 0.06, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 6, 'N', 11, 'P', 0.22, 'W', 10.01, 'Q', 0.91, 'U', 0.06, 'Q+U', 0.97)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 1, 'N', 10, 'P', 0.26, 'W', 9.56, 'Q', 0.96, 'U', 0.08, 'Q+U', 1.03)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 9, 'P', 0.35, 'W', 8.9, 'Q', 0.99, 'U', 0.11, 'Q+U', 1.1)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 1.09, 'Q+U', 1.09)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.43, 'W', 1.0, 'Q', 1.0, 'U', 0.64, 'Q+U', 1.64)\n",
      "('action', 7, 'N', 7, 'P', 0.57, 'W', 7.0, 'Q', 1.0, 'U', 0.22, 'Q+U', 1.22)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1, -1, -1,  1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000101101011010000', '000101101111010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.24, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.48, 'Q+U', 0.51)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.32, 'Q+U', 0.45)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.48, 'Q+U', 0.46)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.47, 'Q+U', 0.5)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.53)\n",
      "('action', 8, 'N', 61, 'P', 0.11, 'W', 37.14, 'Q', 0.61, 'U', 0.02, 'Q+U', 0.62)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.46, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.32, 'Q+U', 0.65)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.53)\n",
      "('action', 4, 'N', 14, 'P', 0.13, 'W', 11.82, 'Q', 0.84, 'U', 0.07, 'Q+U', 0.91)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.52, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.49, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.45, 'Q+U', 0.46)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 5, 'N', 13, 'P', 0.17, 'W', 11.44, 'Q', 0.88, 'U', 0.04, 'Q+U', 0.92)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 2, 'N', 13, 'P', 0.21, 'W', 11.48, 'Q', 0.88, 'U', 0.05, 'Q+U', 0.94)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 12, 'P', 0.22, 'W', 11.01, 'Q', 0.92, 'U', 0.06, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 1, 'N', 11, 'P', 0.26, 'W', 10.56, 'Q', 0.96, 'U', 0.08, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.0, 'Q+U', 1.0)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.11, 'Q+U', 1.1)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 1.14, 'Q+U', 1.14)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.24, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.48, 'Q+U', 0.51)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.54)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.32, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.48, 'Q+U', 0.47)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.48, 'Q+U', 0.51)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.47, 'Q+U', 0.47)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.53)\n",
      "('action', 8, 'N', 62, 'P', 0.11, 'W', 38.14, 'Q', 0.62, 'U', 0.02, 'Q+U', 0.63)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.46, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.32, 'Q+U', 0.65)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.54)\n",
      "('action', 4, 'N', 15, 'P', 0.13, 'W', 12.82, 'Q', 0.85, 'U', 0.06, 'Q+U', 0.92)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.52, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.49, 'Q+U', 0.65)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.46, 'Q+U', 0.47)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 14, 'P', 0.17, 'W', 12.44, 'Q', 0.89, 'U', 0.04, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 14, 'P', 0.21, 'W', 12.48, 'Q', 0.89, 'U', 0.05, 'Q+U', 0.94)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 6, 'N', 13, 'P', 0.22, 'W', 12.01, 'Q', 0.92, 'U', 0.06, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 1, 'N', 12, 'P', 0.26, 'W', 11.56, 'Q', 0.96, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.94, 'Q+U', 0.94)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.11, 'Q+U', 1.1)\n",
      "('action', 7, 'N', 1, 'P', 0.35, 'W', 1.0, 'Q', 1.0, 'U', 0.6, 'Q+U', 1.6)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.24, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.48, 'Q+U', 0.52)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.32, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.48, 'Q+U', 0.47)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.48, 'Q+U', 0.51)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.47, 'Q+U', 0.54)\n",
      "('action', 8, 'N', 63, 'P', 0.11, 'W', 39.14, 'Q', 0.62, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.47, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.32, 'Q+U', 0.65)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.54)\n",
      "('action', 4, 'N', 16, 'P', 0.13, 'W', 13.82, 'Q', 0.86, 'U', 0.06, 'Q+U', 0.92)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.53, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.49, 'Q+U', 0.65)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.46, 'Q+U', 0.47)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 5, 'N', 15, 'P', 0.17, 'W', 13.44, 'Q', 0.9, 'U', 0.04, 'Q+U', 0.94)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 2, 'N', 15, 'P', 0.21, 'W', 13.48, 'Q', 0.9, 'U', 0.05, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 14, 'P', 0.22, 'W', 13.01, 'Q', 0.93, 'U', 0.06, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 1, 'N', 13, 'P', 0.26, 'W', 12.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.98, 'Q+U', 0.98)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.08, 'Q+U', 1.08)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 2, 'P', 0.35, 'W', 2.0, 'Q', 1.0, 'U', 0.41, 'Q+U', 1.41)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.25, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.49, 'Q+U', 0.52)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.32, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.49, 'Q+U', 0.47)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.48, 'Q+U', 0.51)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.54)\n",
      "('action', 8, 'N', 64, 'P', 0.11, 'W', 40.14, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.47, 'Q+U', 0.69)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.32, 'Q+U', 0.65)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.54)\n",
      "('action', 4, 'N', 17, 'P', 0.13, 'W', 14.82, 'Q', 0.87, 'U', 0.06, 'Q+U', 0.93)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.53, 'Q+U', 0.68)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.5, 'Q+U', 0.65)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.46, 'Q+U', 0.47)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 5, 'N', 16, 'P', 0.17, 'W', 14.44, 'Q', 0.9, 'U', 0.04, 'Q+U', 0.94)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 2, 'N', 16, 'P', 0.21, 'W', 14.48, 'Q', 0.91, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 6, 'N', 15, 'P', 0.22, 'W', 14.01, 'Q', 0.93, 'U', 0.05, 'Q+U', 0.99)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 1, 'N', 14, 'P', 0.26, 'W', 13.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 1.01, 'Q+U', 1.01)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.92, 'Q+U', 0.92)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.12, 'Q+U', 1.12)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 3, 'P', 0.35, 'W', 3.0, 'Q', 1.0, 'U', 0.32, 'Q+U', 1.32)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.25, 'Q+U', 0.34)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.49, 'Q+U', 0.52)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.55)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.33, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.49, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.49, 'Q+U', 0.52)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.54)\n",
      "('action', 8, 'N', 65, 'P', 0.11, 'W', 41.14, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.48, 'Q+U', 0.69)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.33, 'Q+U', 0.66)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.55)\n",
      "('action', 4, 'N', 18, 'P', 0.13, 'W', 15.82, 'Q', 0.88, 'U', 0.05, 'Q+U', 0.93)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.54, 'Q+U', 0.68)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.5, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.47, 'Q+U', 0.48)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 5, 'N', 17, 'P', 0.17, 'W', 15.44, 'Q', 0.91, 'U', 0.04, 'Q+U', 0.95)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 17, 'P', 0.21, 'W', 15.48, 'Q', 0.91, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 16, 'P', 0.22, 'W', 15.01, 'Q', 0.94, 'U', 0.05, 'Q+U', 0.99)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.96, 'Q+U', 0.96)\n",
      "('action', 1, 'N', 15, 'P', 0.26, 'W', 14.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1, -1, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37307718)\n",
      "('PREDICTED PROBS: ', array([ 0.3413097 ,  0.34924269,  0.30944762], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101001110000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.25, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.49, 'Q+U', 0.53)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.33, 'Q+U', 0.46)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.49, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.49, 'Q+U', 0.52)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.48, 'Q+U', 0.55)\n",
      "('action', 8, 'N', 66, 'P', 0.11, 'W', 41.52, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.48, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.33, 'Q+U', 0.66)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.55)\n",
      "('action', 4, 'N', 19, 'P', 0.13, 'W', 16.19, 'Q', 0.85, 'U', 0.05, 'Q+U', 0.9)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.54, 'Q+U', 0.69)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.51, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.47, 'Q+U', 0.48)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.59, 'Q+U', 0.59)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 5, 'N', 18, 'P', 0.17, 'W', 15.81, 'Q', 0.88, 'U', 0.04, 'Q+U', 0.92)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 18, 'P', 0.21, 'W', 15.86, 'Q', 0.88, 'U', 0.05, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 17, 'P', 0.22, 'W', 15.38, 'Q', 0.9, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.99, 'Q+U', 0.99)\n",
      "('action', 1, 'N', 15, 'P', 0.26, 'W', 14.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.54, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 0.98, 'Q+U', 0.98)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.16, 'Q+U', 1.16)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.12, 'Q+U', 1.11)\n",
      "('action', 7, 'N', 4, 'P', 0.35, 'W', 4.0, 'Q', 1.0, 'U', 0.27, 'Q+U', 1.27)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.25, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.53)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.33, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.49, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.49, 'Q+U', 0.52)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.55)\n",
      "('action', 8, 'N', 67, 'P', 0.11, 'W', 42.52, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.48, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.33, 'Q+U', 0.66)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.55)\n",
      "('action', 4, 'N', 20, 'P', 0.13, 'W', 17.19, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.54, 'Q+U', 0.69)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.51, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.48, 'Q+U', 0.49)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.56, 'Q+U', 0.56)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 5, 'N', 19, 'P', 0.17, 'W', 16.81, 'Q', 0.88, 'U', 0.04, 'Q+U', 0.92)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 2, 'N', 19, 'P', 0.21, 'W', 16.86, 'Q', 0.89, 'U', 0.05, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 18, 'P', 0.22, 'W', 16.38, 'Q', 0.91, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.02, 'Q+U', 1.02)\n",
      "('action', 1, 'N', 16, 'P', 0.26, 'W', 15.56, 'Q', 0.97, 'U', 0.07, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.55, 'Q+U', 0.93)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.01, 'Q+U', 1.01)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.3, 'W', 0.0, 'Q', 0.0, 'U', 1.2, 'Q+U', 1.2)\n",
      "('action', 3, 'N', 10, 'P', 0.35, 'W', 9.9, 'Q', 0.99, 'U', 0.13, 'Q+U', 1.12)\n",
      "('action', 7, 'N', 5, 'P', 0.35, 'W', 5.0, 'Q', 1.0, 'U', 0.23, 'Q+U', 1.23)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0, -1, -1,  0, -1,  1,  1,  1,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101011010000', '000001111011010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.25, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.53)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.56)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.33, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.5, 'Q+U', 0.48)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.55)\n",
      "('action', 8, 'N', 68, 'P', 0.11, 'W', 43.52, 'Q', 0.64, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.49, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.33, 'Q+U', 0.66)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.56)\n",
      "('action', 4, 'N', 21, 'P', 0.13, 'W', 18.19, 'Q', 0.87, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.55, 'Q+U', 0.69)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.51, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.48, 'Q+U', 0.49)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.57, 'Q+U', 0.57)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 5, 'N', 20, 'P', 0.17, 'W', 17.81, 'Q', 0.89, 'U', 0.04, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 2, 'N', 20, 'P', 0.21, 'W', 17.86, 'Q', 0.89, 'U', 0.05, 'Q+U', 0.94)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('action', 6, 'N', 19, 'P', 0.22, 'W', 17.38, 'Q', 0.91, 'U', 0.05, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('action', 1, 'N', 17, 'P', 0.26, 'W', 16.56, 'Q', 0.97, 'U', 0.06, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.57, 'Q+U', 0.94)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.04, 'Q+U', 1.04)\n",
      "('chosen action...', 0)\n",
      "('moving to...', array([-1,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37860429)\n",
      "('PREDICTED PROBS: ', array([ 0.34713188,  0.34446198,  0.3084062 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101101010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.25, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.54)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.57)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.33, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.5, 'Q+U', 0.49)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.5, 'Q+U', 0.49)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.56)\n",
      "('action', 8, 'N', 69, 'P', 0.11, 'W', 43.89, 'Q', 0.64, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.49, 'Q+U', 0.71)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.34, 'Q+U', 0.67)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.56)\n",
      "('action', 4, 'N', 22, 'P', 0.13, 'W', 18.57, 'Q', 0.84, 'U', 0.05, 'Q+U', 0.89)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.55, 'Q+U', 0.7)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.52, 'Q+U', 0.67)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.48, 'Q+U', 0.49)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 5, 'N', 21, 'P', 0.17, 'W', 18.19, 'Q', 0.87, 'U', 0.04, 'Q+U', 0.9)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 2, 'N', 21, 'P', 0.21, 'W', 18.24, 'Q', 0.87, 'U', 0.04, 'Q+U', 0.91)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 6, 'N', 20, 'P', 0.22, 'W', 17.76, 'Q', 0.89, 'U', 0.05, 'Q+U', 0.94)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1,  0,  1]))\n",
      "('action', 0, 'N', 1, 'P', 0.24, 'W', 0.38, 'Q', 0.38, 'U', 0.54, 'Q+U', 0.91)\n",
      "('action', 1, 'N', 17, 'P', 0.26, 'W', 16.56, 'Q', 0.97, 'U', 0.06, 'Q+U', 1.04)\n",
      "('action', 3, 'N', 1, 'P', 0.26, 'W', 0.37, 'Q', 0.37, 'U', 0.58, 'Q+U', 0.96)\n",
      "('action', 7, 'N', 0, 'P', 0.24, 'W', 0.0, 'Q', 0.0, 'U', 1.06, 'Q+U', 1.06)\n",
      "('chosen action...', 7)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  1, -1,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.32278523)\n",
      "('PREDICTED PROBS: ', array([ 0.3193782 ,  0.33569166,  0.3449302 ], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000001101001010000', '000001101001010010'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.51, 'Q+U', 0.54)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.34, 'Q+U', 0.47)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.5, 'Q+U', 0.49)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.49, 'Q+U', 0.56)\n",
      "('action', 8, 'N', 70, 'P', 0.11, 'W', 44.22, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.49, 'Q+U', 0.71)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.34, 'Q+U', 0.67)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.57)\n",
      "('action', 4, 'N', 23, 'P', 0.13, 'W', 18.89, 'Q', 0.82, 'U', 0.05, 'Q+U', 0.87)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.56, 'Q+U', 0.7)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.52, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.49, 'Q+U', 0.5)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.6, 'Q+U', 0.6)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.64, 'Q+U', 0.64)\n",
      "('action', 5, 'N', 22, 'P', 0.17, 'W', 18.51, 'Q', 0.84, 'U', 0.03, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 2, 'N', 22, 'P', 0.21, 'W', 18.56, 'Q', 0.84, 'U', 0.04, 'Q+U', 0.89)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('action', 1, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.95, 'Q+U', 0.95)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0,  1, -1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.45065233)\n",
      "('PREDICTED PROBS: ', array([ 0.24604604,  0.25497061,  0.25636229,  0.24262106], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '010001001001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.51, 'Q+U', 0.54)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.34, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.51, 'Q+U', 0.49)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.5, 'Q+U', 0.53)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.56)\n",
      "('action', 8, 'N', 71, 'P', 0.11, 'W', 44.67, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.5, 'Q+U', 0.71)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.34, 'Q+U', 0.67)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 4, 'N', 24, 'P', 0.13, 'W', 19.34, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.56, 'Q+U', 0.71)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.52, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.49, 'Q+U', 0.5)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.61, 'Q+U', 0.61)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 5, 'N', 23, 'P', 0.17, 'W', 18.96, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 2, 'N', 23, 'P', 0.21, 'W', 19.01, 'Q', 0.83, 'U', 0.04, 'Q+U', 0.87)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('action', 1, 'N', 1, 'P', 0.2, 'W', 0.45, 'Q', 0.45, 'U', 0.49, 'Q+U', 0.94)\n",
      "('action', 3, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.97, 'Q+U', 0.97)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.92, 'Q+U', 0.92)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.71427965)\n",
      "('PREDICTED PROBS: ', array([ 0.21108095,  0.26273152,  0.27584106,  0.25034645], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000101001001010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.35)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.51, 'Q+U', 0.55)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.34, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.51, 'Q+U', 0.5)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.51, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.51, 'Q+U', 0.5)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.57)\n",
      "('action', 8, 'N', 72, 'P', 0.11, 'W', 45.38, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.5, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.34, 'Q+U', 0.67)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 4, 'N', 25, 'P', 0.13, 'W', 20.06, 'Q', 0.8, 'U', 0.04, 'Q+U', 0.84)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.56, 'Q+U', 0.71)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.53, 'Q+U', 0.68)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.49, 'Q+U', 0.5)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 5, 'N', 24, 'P', 0.17, 'W', 19.68, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 2, 'N', 24, 'P', 0.21, 'W', 19.72, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('action', 1, 'N', 1, 'P', 0.2, 'W', 0.45, 'Q', 0.45, 'U', 0.5, 'Q+U', 0.95)\n",
      "('action', 3, 'N', 1, 'P', 0.2, 'W', 0.71, 'Q', 0.71, 'U', 0.5, 'Q+U', 1.21)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.94, 'Q+U', 0.94)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 1, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 6, 'N', 0, 'P', 0.28, 'W', 0.0, 'Q', 0.0, 'U', 0.28, 'Q+U', 0.28)\n",
      "('action', 7, 'N', 0, 'P', 0.25, 'W', 0.0, 'Q', 0.0, 'U', 0.25, 'Q+U', 0.25)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1, -1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.68630624)\n",
      "('PREDICTED PROBS: ', array([ 0.29034367,  0.36074233,  0.34891403], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000101001001010000', '000101001001010100'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.55)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.34, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.51, 'Q+U', 0.5)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.51, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.5, 'Q+U', 0.57)\n",
      "('action', 8, 'N', 73, 'P', 0.11, 'W', 46.07, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.5, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.35, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.58)\n",
      "('action', 4, 'N', 26, 'P', 0.13, 'W', 20.74, 'Q', 0.8, 'U', 0.04, 'Q+U', 0.84)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.57, 'Q+U', 0.71)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.53, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.5, 'Q+U', 0.51)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.63, 'Q+U', 0.63)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 5, 'N', 25, 'P', 0.17, 'W', 20.37, 'Q', 0.81, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 2, 'N', 25, 'P', 0.21, 'W', 20.41, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 6, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1, -1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37556517)\n",
      "('PREDICTED PROBS: ', array([ 0.17613721,  0.19929865,  0.24865176,  0.18592381,  0.18998858], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001000010100'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.55)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.34, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.52, 'Q+U', 0.5)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.51, 'Q+U', 0.54)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 8, 'N', 74, 'P', 0.11, 'W', 46.44, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.51, 'Q+U', 0.72)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.35, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 4, 'N', 27, 'P', 0.13, 'W', 21.12, 'Q', 0.78, 'U', 0.04, 'Q+U', 0.82)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.57, 'Q+U', 0.72)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.54, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.5, 'Q+U', 0.51)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.65, 'Q+U', 0.65)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 2, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 5, 'N', 26, 'P', 0.17, 'W', 20.74, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0,  1,  0, -1,  0,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.36475649)\n",
      "('PREDICTED PROBS: ', array([ 0.14507838,  0.16387309,  0.15745099,  0.20345452,  0.17516474,\n",
      "        0.15497823], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '001000001000010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.55)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.35, 'Q+U', 0.48)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.52, 'Q+U', 0.51)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.55)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.57)\n",
      "('action', 8, 'N', 75, 'P', 0.11, 'W', 46.81, 'Q', 0.62, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.51, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.35, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 4, 'N', 28, 'P', 0.13, 'W', 21.48, 'Q', 0.77, 'U', 0.04, 'Q+U', 0.81)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.58, 'Q+U', 0.72)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.54, 'Q+U', 0.69)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.5, 'Q+U', 0.51)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.66, 'Q+U', 0.66)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 2, 'N', 1, 'P', 0.17, 'W', 0.36, 'Q', 0.36, 'U', 0.44, 'Q+U', 0.8)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 5, 'N', 26, 'P', 0.17, 'W', 20.74, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 2, 'N', 25, 'P', 0.21, 'W', 20.41, 'Q', 0.82, 'U', 0.04, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.44, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0, -1,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.93, 'Q+U', 0.93)\n",
      "('action', 1, 'N', 1, 'P', 0.2, 'W', 0.45, 'Q', 0.45, 'U', 0.51, 'Q+U', 0.96)\n",
      "('action', 3, 'N', 2, 'P', 0.2, 'W', 1.4, 'Q', 0.7, 'U', 0.34, 'Q+U', 1.04)\n",
      "('action', 6, 'N', 21, 'P', 0.22, 'W', 18.08, 'Q', 0.86, 'U', 0.05, 'Q+U', 0.91)\n",
      "('action', 7, 'N', 0, 'P', 0.19, 'W', 0.0, 'Q', 0.0, 'U', 0.96, 'Q+U', 0.96)\n",
      "('chosen action...', 3)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 1, 'N', 0, 'P', 0.26, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('action', 6, 'N', 1, 'P', 0.28, 'W', 0.69, 'Q', 0.69, 'U', 0.2, 'Q+U', 0.88)\n",
      "('action', 7, 'N', 0, 'P', 0.25, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0, -1,  1, -1,  1, -1,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.29, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 1, 'N', 0, 'P', 0.36, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 7, 'N', 0, 'P', 0.35, 'W', 0.0, 'Q', 0.0, 'U', 0.35, 'Q+U', 0.35)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0,  1, -1,  1, -1,  1, -1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.65616852)\n",
      "('PREDICTED PROBS: ', array([ 0.49104178,  0.50895822], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001001010000', '000101001001010000', '000101001001010100', '010101001001010100'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.26, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.56)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.59)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.35, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.52, 'Q+U', 0.51)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.55)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.51, 'Q+U', 0.58)\n",
      "('action', 8, 'N', 76, 'P', 0.11, 'W', 47.46, 'Q', 0.62, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.51, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.35, 'Q+U', 0.68)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.59)\n",
      "('action', 4, 'N', 29, 'P', 0.13, 'W', 22.14, 'Q', 0.76, 'U', 0.04, 'Q+U', 0.8)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.58, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.54, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.51, 'Q+U', 0.52)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.67, 'Q+U', 0.67)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 2, 'N', 1, 'P', 0.17, 'W', 0.36, 'Q', 0.36, 'U', 0.45, 'Q+U', 0.81)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('action', 5, 'N', 27, 'P', 0.17, 'W', 21.4, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.45, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.61596084)\n",
      "('PREDICTED PROBS: ', array([ 0.14857976,  0.29174513,  0.1698802 ,  0.20584209,  0.18395282], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.56)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.59)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.35, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.53, 'Q+U', 0.51)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.52, 'Q+U', 0.55)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 8, 'N', 77, 'P', 0.11, 'W', 48.08, 'Q', 0.62, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.52, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.36, 'Q+U', 0.69)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.59)\n",
      "('action', 4, 'N', 30, 'P', 0.13, 'W', 22.76, 'Q', 0.76, 'U', 0.04, 'Q+U', 0.8)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.58, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.55, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.51, 'Q+U', 0.52)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.68, 'Q+U', 0.68)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 2, 'N', 1, 'P', 0.17, 'W', 0.36, 'Q', 0.36, 'U', 0.45, 'Q+U', 0.82)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 5, 'N', 28, 'P', 0.17, 'W', 22.01, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0,  0,  1,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 1, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 3, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.16, 'Q+U', 0.16)\n",
      "('action', 5, 'N', 0, 'P', 0.2, 'W', 0.0, 'Q', 0.0, 'U', 0.2, 'Q+U', 0.2)\n",
      "('action', 6, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('action', 7, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  1,  0, -1, -1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.37112954)\n",
      "('PREDICTED PROBS: ', array([ 0.17902395,  0.20726381,  0.19746712,  0.21429496,  0.20195016], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '001000001000010000', '001000001000011000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.36)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.56)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.59)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.35, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.53, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.58)\n",
      "('action', 8, 'N', 78, 'P', 0.11, 'W', 48.45, 'Q', 0.62, 'U', 0.01, 'Q+U', 0.63)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.52, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.36, 'Q+U', 0.69)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.59)\n",
      "('action', 4, 'N', 31, 'P', 0.13, 'W', 23.13, 'Q', 0.75, 'U', 0.04, 'Q+U', 0.78)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.59, 'Q+U', 0.73)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.55, 'Q+U', 0.7)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.51, 'Q+U', 0.52)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.69, 'Q+U', 0.69)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.31, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 5, 'N', 28, 'P', 0.17, 'W', 22.01, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 1, 'P', 0.16, 'W', 0.62, 'Q', 0.62, 'U', 0.43, 'Q+U', 1.05)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.46, 'Q+U', 0.83)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.15, 'Q+U', 0.15)\n",
      "('action', 2, 'N', 0, 'P', 0.29, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.17, 'Q+U', 0.17)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.18, 'Q+U', 0.18)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.57)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.35, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.53, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.53, 'Q+U', 0.52)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.59)\n",
      "('action', 8, 'N', 79, 'P', 0.11, 'W', 49.45, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.52, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.36, 'Q+U', 0.69)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.6)\n",
      "('action', 4, 'N', 32, 'P', 0.13, 'W', 24.13, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.79)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.59, 'Q+U', 0.74)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.55, 'Q+U', 0.71)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.52, 'Q+U', 0.53)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.7, 'Q+U', 0.7)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.31, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 5, 'N', 29, 'P', 0.17, 'W', 23.01, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.72, 'Q+U', 0.72)\n",
      "('chosen action...', 6)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  1,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('PREDICTED VALUE: ', 0.35727564)\n",
      "('PREDICTED PROBS: ', array([ 0.15172201,  0.164589  ,  0.1807813 ,  0.16093844,  0.19298658,\n",
      "        0.14898272], dtype=float32))\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000000101000010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.54, 'Q+U', 0.57)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.36, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.53, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.52, 'Q+U', 0.59)\n",
      "('action', 8, 'N', 80, 'P', 0.11, 'W', 49.81, 'Q', 0.62, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.53, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.36, 'Q+U', 0.69)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 4, 'N', 33, 'P', 0.13, 'W', 24.48, 'Q', 0.74, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.6, 'Q+U', 0.74)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.56, 'Q+U', 0.71)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.52, 'Q+U', 0.53)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.71, 'Q+U', 0.71)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.32, 'Q+U', 0.68)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 5, 'N', 29, 'P', 0.17, 'W', 23.01, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.42, 'Q+U', 0.78)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 2, 'P', 0.16, 'W', 1.62, 'Q', 0.81, 'U', 0.29, 'Q+U', 1.1)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.47, 'Q+U', 0.84)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.21, 'Q+U', 0.21)\n",
      "('action', 2, 'N', 1, 'P', 0.29, 'W', 1.0, 'Q', 1.0, 'U', 0.21, 'Q+U', 1.21)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.24, 'Q+U', 0.24)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.54, 'Q+U', 0.57)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.36, 'Q+U', 0.49)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.54, 'Q+U', 0.52)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.53, 'Q+U', 0.56)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.59)\n",
      "('action', 8, 'N', 81, 'P', 0.11, 'W', 50.81, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.53, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.37, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 4, 'N', 34, 'P', 0.13, 'W', 25.48, 'Q', 0.75, 'U', 0.03, 'Q+U', 0.78)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.6, 'Q+U', 0.74)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.56, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.52, 'Q+U', 0.53)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.73, 'Q+U', 0.73)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.32, 'Q+U', 0.69)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 5, 'N', 30, 'P', 0.17, 'W', 24.01, 'Q', 0.8, 'U', 0.03, 'Q+U', 0.83)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.43, 'Q+U', 0.79)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 1, 'N', 3, 'P', 0.16, 'W', 2.62, 'Q', 0.87, 'U', 0.22, 'Q+U', 1.1)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.47, 'Q+U', 0.85)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.26, 'Q+U', 0.26)\n",
      "('action', 2, 'N', 2, 'P', 0.29, 'W', 2.0, 'Q', 1.0, 'U', 0.17, 'Q+U', 1.17)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.29, 'Q+U', 0.29)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.32, 'Q+U', 0.32)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.54, 'Q+U', 0.57)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.36, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.54, 'Q+U', 0.53)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.54, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.53, 'Q+U', 0.53)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.59)\n",
      "('action', 8, 'N', 82, 'P', 0.11, 'W', 51.81, 'Q', 0.63, 'U', 0.01, 'Q+U', 0.64)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.53, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.37, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.61)\n",
      "('action', 4, 'N', 35, 'P', 0.13, 'W', 26.48, 'Q', 0.76, 'U', 0.03, 'Q+U', 0.79)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.6, 'Q+U', 0.75)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.56, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.53, 'Q+U', 0.54)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.74, 'Q+U', 0.74)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.33, 'Q+U', 0.69)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 5, 'N', 31, 'P', 0.17, 'W', 25.01, 'Q', 0.81, 'U', 0.03, 'Q+U', 0.84)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.43, 'Q+U', 0.79)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 1, 'N', 4, 'P', 0.16, 'W', 3.62, 'Q', 0.9, 'U', 0.18, 'Q+U', 1.09)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.48, 'Q+U', 0.86)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.87, 'Q+U', 0.87)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.3, 'Q+U', 0.3)\n",
      "('action', 2, 'N', 3, 'P', 0.29, 'W', 3.0, 'Q', 1.0, 'U', 0.15, 'Q+U', 1.15)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.34, 'Q+U', 0.34)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.37, 'Q+U', 0.37)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.27, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.58)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.36, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.54, 'Q+U', 0.53)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.54, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.53, 'Q+U', 0.6)\n",
      "('action', 8, 'N', 83, 'P', 0.11, 'W', 52.81, 'Q', 0.64, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.54, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.37, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.55, 'Q+U', 0.61)\n",
      "('action', 4, 'N', 36, 'P', 0.13, 'W', 27.48, 'Q', 0.76, 'U', 0.03, 'Q+U', 0.8)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.61, 'Q+U', 0.75)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.57, 'Q+U', 0.72)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.53, 'Q+U', 0.54)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.75, 'Q+U', 0.75)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.33, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('action', 5, 'N', 32, 'P', 0.17, 'W', 26.01, 'Q', 0.81, 'U', 0.03, 'Q+U', 0.84)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.44, 'Q+U', 0.8)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 1, 'N', 5, 'P', 0.16, 'W', 4.62, 'Q', 0.92, 'U', 0.15, 'Q+U', 1.08)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.04, 'Q+U', 0.85)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.49, 'Q+U', 0.86)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.33, 'Q+U', 0.33)\n",
      "('action', 2, 'N', 4, 'P', 0.29, 'W', 4.0, 'Q', 1.0, 'U', 0.13, 'Q+U', 1.13)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.38, 'Q+U', 0.38)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.46, 'Q+U', 0.46)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.41, 'Q+U', 0.41)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.28, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.58)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.55, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.36, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.55, 'Q+U', 0.53)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.54, 'Q+U', 0.57)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 8, 'N', 84, 'P', 0.11, 'W', 53.81, 'Q', 0.64, 'U', 0.01, 'Q+U', 0.65)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.54, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.37, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.55, 'Q+U', 0.61)\n",
      "('action', 4, 'N', 37, 'P', 0.13, 'W', 28.48, 'Q', 0.77, 'U', 0.03, 'Q+U', 0.8)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.61, 'Q+U', 0.76)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.57, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.53, 'Q+U', 0.54)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.76, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.34, 'Q+U', 0.7)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.81, 'Q+U', 0.81)\n",
      "('action', 5, 'N', 33, 'P', 0.17, 'W', 27.01, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.45, 'Q+U', 0.8)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 1, 'N', 6, 'P', 0.16, 'W', 5.62, 'Q', 0.94, 'U', 0.13, 'Q+U', 1.07)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.05, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.5, 'Q+U', 0.87)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.36, 'Q+U', 0.36)\n",
      "('action', 2, 'N', 5, 'P', 0.29, 'W', 5.0, 'Q', 1.0, 'U', 0.12, 'Q+U', 1.12)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.5, 'Q+U', 0.5)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.28, 'Q+U', 0.37)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.58)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.55, 'Q+U', 0.61)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.37, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.55, 'Q+U', 0.54)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.6)\n",
      "('action', 8, 'N', 85, 'P', 0.11, 'W', 54.81, 'Q', 0.64, 'U', 0.01, 'Q+U', 0.66)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.54, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.37, 'Q+U', 0.7)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.55, 'Q+U', 0.62)\n",
      "('action', 4, 'N', 38, 'P', 0.13, 'W', 29.48, 'Q', 0.78, 'U', 0.03, 'Q+U', 0.81)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.61, 'Q+U', 0.76)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.57, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.54, 'Q+U', 0.55)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.77, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.34, 'Q+U', 0.71)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.82, 'Q+U', 0.82)\n",
      "('action', 5, 'N', 34, 'P', 0.17, 'W', 28.01, 'Q', 0.82, 'U', 0.03, 'Q+U', 0.85)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.45, 'Q+U', 0.81)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 1, 'N', 7, 'P', 0.16, 'W', 6.62, 'Q', 0.95, 'U', 0.12, 'Q+U', 1.06)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.05, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.88, 'Q+U', 0.88)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.5, 'Q+U', 0.88)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.91, 'Q+U', 0.91)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.39, 'Q+U', 0.39)\n",
      "('action', 2, 'N', 6, 'P', 0.29, 'W', 6.0, 'Q', 1.0, 'U', 0.11, 'Q+U', 1.11)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.54, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.49, 'Q+U', 0.49)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.28, 'Q+U', 0.38)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.59)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.55, 'Q+U', 0.62)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.37, 'Q+U', 0.5)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.55, 'Q+U', 0.54)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.55, 'Q+U', 0.54)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.61)\n",
      "('action', 8, 'N', 86, 'P', 0.11, 'W', 55.81, 'Q', 0.65, 'U', 0.01, 'Q+U', 0.66)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.55, 'Q+U', 0.76)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.38, 'Q+U', 0.71)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.56, 'Q+U', 0.62)\n",
      "('action', 4, 'N', 39, 'P', 0.13, 'W', 30.48, 'Q', 0.78, 'U', 0.03, 'Q+U', 0.81)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.62, 'Q+U', 0.76)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.58, 'Q+U', 0.73)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.54, 'Q+U', 0.55)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.78, 'Q+U', 0.78)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.34, 'Q+U', 0.71)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.83, 'Q+U', 0.83)\n",
      "('action', 5, 'N', 35, 'P', 0.17, 'W', 29.01, 'Q', 0.83, 'U', 0.03, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.46, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.85, 'Q+U', 0.85)\n",
      "('action', 1, 'N', 8, 'P', 0.16, 'W', 7.62, 'Q', 0.95, 'U', 0.11, 'Q+U', 1.06)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.05, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.89, 'Q+U', 0.89)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.51, 'Q+U', 0.89)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.92, 'Q+U', 0.92)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.42, 'Q+U', 0.42)\n",
      "('action', 2, 'N', 7, 'P', 0.29, 'W', 7.0, 'Q', 1.0, 'U', 0.1, 'Q+U', 1.1)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.48, 'Q+U', 0.48)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.58, 'Q+U', 0.58)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.52, 'Q+U', 0.52)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('current board...', array([0, 0, 0, 0, 0, 0, 0, 0, 0]))\n",
      "('action', 0, 'N', 3, 'P', 0.11, 'W', 0.29, 'Q', 0.1, 'U', 0.28, 'Q+U', 0.38)\n",
      "('action', 1, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.56, 'Q+U', 0.59)\n",
      "('action', 2, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.56, 'Q+U', 0.62)\n",
      "('action', 3, 'N', 2, 'P', 0.11, 'W', 0.27, 'Q', 0.14, 'U', 0.37, 'Q+U', 0.51)\n",
      "('action', 4, 'N', 1, 'P', 0.11, 'W', -0.01, 'Q', -0.01, 'U', 0.55, 'Q+U', 0.54)\n",
      "('action', 5, 'N', 1, 'P', 0.11, 'W', 0.03, 'Q', 0.03, 'U', 0.55, 'Q+U', 0.58)\n",
      "('action', 6, 'N', 1, 'P', 0.11, 'W', -0.0, 'Q', -0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('action', 7, 'N', 1, 'P', 0.11, 'W', 0.06, 'Q', 0.06, 'U', 0.54, 'Q+U', 0.61)\n",
      "('action', 8, 'N', 87, 'P', 0.11, 'W', 56.81, 'Q', 0.65, 'U', 0.01, 'Q+U', 0.67)\n",
      "('chosen action...', 8)\n",
      "('moving to...', array([0, 0, 0, 0, 0, 0, 0, 0, 1]))\n",
      "('action', 0, 'N', 1, 'P', 0.12, 'W', 0.22, 'Q', 0.22, 'U', 0.55, 'Q+U', 0.77)\n",
      "('action', 1, 'N', 2, 'P', 0.12, 'W', 0.66, 'Q', 0.33, 'U', 0.38, 'Q+U', 0.71)\n",
      "('action', 2, 'N', 39, 'P', 0.14, 'W', 23.88, 'Q', 0.61, 'U', 0.03, 'Q+U', 0.64)\n",
      "('action', 3, 'N', 1, 'P', 0.12, 'W', 0.06, 'Q', 0.06, 'U', 0.56, 'Q+U', 0.62)\n",
      "('action', 4, 'N', 40, 'P', 0.13, 'W', 31.48, 'Q', 0.79, 'U', 0.03, 'Q+U', 0.82)\n",
      "('action', 5, 'N', 1, 'P', 0.13, 'W', 0.15, 'Q', 0.15, 'U', 0.62, 'Q+U', 0.77)\n",
      "('action', 6, 'N', 1, 'P', 0.12, 'W', 0.15, 'Q', 0.15, 'U', 0.58, 'Q+U', 0.74)\n",
      "('action', 7, 'N', 1, 'P', 0.12, 'W', 0.01, 'Q', 0.01, 'U', 0.54, 'Q+U', 0.55)\n",
      "('chosen action...', 4)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  0,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.12, 'W', 0.0, 'Q', 0.0, 'U', 0.79, 'Q+U', 0.79)\n",
      "('action', 1, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 2, 'N', 2, 'P', 0.17, 'W', 0.74, 'Q', 0.37, 'U', 0.35, 'Q+U', 0.72)\n",
      "('action', 3, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.84, 'Q+U', 0.84)\n",
      "('action', 5, 'N', 36, 'P', 0.17, 'W', 30.01, 'Q', 0.83, 'U', 0.03, 'Q+U', 0.86)\n",
      "('action', 6, 'N', 1, 'P', 0.15, 'W', 0.36, 'Q', 0.36, 'U', 0.46, 'Q+U', 0.82)\n",
      "('action', 7, 'N', 0, 'P', 0.13, 'W', 0.0, 'Q', 0.0, 'U', 0.8, 'Q+U', 0.8)\n",
      "('chosen action...', 5)\n",
      "('moving to...', array([ 0,  0,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.14, 'W', 0.0, 'Q', 0.0, 'U', 0.86, 'Q+U', 0.86)\n",
      "('action', 1, 'N', 9, 'P', 0.16, 'W', 8.62, 'Q', 0.96, 'U', 0.1, 'Q+U', 1.06)\n",
      "('action', 2, 'N', 26, 'P', 0.21, 'W', 21.07, 'Q', 0.81, 'U', 0.05, 'Q+U', 0.86)\n",
      "('action', 3, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.9, 'Q+U', 0.9)\n",
      "('action', 6, 'N', 1, 'P', 0.17, 'W', 0.38, 'Q', 0.38, 'U', 0.52, 'Q+U', 0.89)\n",
      "('action', 7, 'N', 0, 'P', 0.16, 'W', 0.0, 'Q', 0.0, 'U', 0.94, 'Q+U', 0.94)\n",
      "('chosen action...', 1)\n",
      "('moving to...', array([ 0, -1,  0,  0, -1,  1,  0,  0,  1]))\n",
      "('action', 0, 'N', 0, 'P', 0.15, 'W', 0.0, 'Q', 0.0, 'U', 0.45, 'Q+U', 0.45)\n",
      "('action', 2, 'N', 8, 'P', 0.29, 'W', 8.0, 'Q', 1.0, 'U', 0.1, 'Q+U', 1.1)\n",
      "('action', 3, 'N', 0, 'P', 0.17, 'W', 0.0, 'Q', 0.0, 'U', 0.51, 'Q+U', 0.51)\n",
      "('action', 6, 'N', 0, 'P', 0.21, 'W', 0.0, 'Q', 0.0, 'U', 0.62, 'Q+U', 0.62)\n",
      "('action', 7, 'N', 0, 'P', 0.18, 'W', 0.0, 'Q', 0.0, 'U', 0.55, 'Q+U', 0.55)\n",
      "('chosen action...', 2)\n",
      "('moving to...', array([ 0, -1,  1,  0, -1,  1,  0,  0,  1]))\n",
      "THIS IS A LEAF\n",
      "('END GAME: reward = ', 1)\n",
      "('BREADCRUMBS...', ['000000000000000000', '000000001000000000', '000000001000010000', '000001001000010000', '000001001010010000', '001001001010010000'])\n",
      "---------\n",
      "('ACTION VALUES...', array([ 0.03030303,  0.01010101,  0.01010101,  0.02020202,  0.01010101,\n",
      "        0.01010101,  0.01010101,  0.01010101,  0.88888889]))\n",
      "('CHOSEN ACTION...', array([8]))\n"
     ]
    }
   ],
   "source": [
    " action, actionValues = player.act(state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 600,
   "metadata": {},
   "outputs": [],
   "source": [
    "next_state, reward, done, _ = env.step(action)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 601,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 1])"
      ]
     },
     "execution_count": 601,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next_state.board"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 602,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.03030303,  0.01010101,  0.01010101,  0.02020202,  0.01010101,\n",
       "        0.01010101,  0.01010101,  0.01010101,  0.88888889])"
      ]
     },
     "execution_count": 602,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "actionValues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 603,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 603,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 606,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<module 'utils' from 'utils.pyc'>"
      ]
     },
     "execution_count": 606,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reload(game)\n",
    "reload(agent)\n",
    "reload(MCTS)\n",
    "reload (utils)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 608,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "deque([(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([ 0.03030303,  0.01010101,  0.01010101,  0.02020202,  0.01010101,\n",
      "        0.01010101,  0.01010101,  0.01010101,  0.88888889]), 1)], maxlen=2000)\n"
     ]
    }
   ],
   "source": [
    "player.remember(state.binary(), actionValues, state.playerTurn)\n",
    "print(player.memory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 609,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "deque([(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([ 0.03030303,  0.01010101,  0.01010101,  0.02020202,  0.01010101,\n",
      "        0.01010101,  0.01010101,  0.01010101,  0.88888889]), 1)], maxlen=2000)\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "sample larger than population",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-609-4ecb446ebeff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplayer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Users/davidfoster/Git/AGI/scripts/agent.pyc\u001b[0m in \u001b[0;36mreplay\u001b[0;34m(self, batch_size)\u001b[0m\n\u001b[1;32m    176\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0mreplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    177\u001b[0m                 \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmemory\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 178\u001b[0;31m                 \u001b[0mminibatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmemory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    179\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactionValues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwinner\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mminibatch\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    180\u001b[0m                     \u001b[0;31m#target = [winner, actionValues]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/Cellar/python/2.7.13/Frameworks/Python.framework/Versions/2.7/lib/python2.7/random.pyc\u001b[0m in \u001b[0;36msample\u001b[0;34m(self, population, k)\u001b[0m\n\u001b[1;32m    321\u001b[0m         \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpopulation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    322\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mk\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 323\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"sample larger than population\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    324\u001b[0m         \u001b[0mrandom\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m         \u001b[0m_int\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: sample larger than population"
     ]
    }
   ],
   "source": [
    "player.replay(batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deepreinforcement",
   "language": "python",
   "name": "deepreinforcement"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
